updates and things

This commit is contained in:
Joseph Redmon 2016-09-01 16:48:41 -07:00
parent aebe937710
commit 8f1b4e0962
45 changed files with 44944 additions and 2747 deletions

1
.gitignore vendored
View File

@ -14,6 +14,7 @@ decaf/
submission/
cfg/
darknet
.fuse*
# OS Generated #
.DS_Store*

View File

@ -1,6 +1,6 @@
GPU=1
CUDNN=1
OPENCV=1
GPU=0
CUDNN=0
OPENCV=0
DEBUG=0
ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
@ -41,7 +41,7 @@ CFLAGS+= -DCUDNN
LDFLAGS+= -lcudnn
endif
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
ifeq ($(GPU), 1)
LDFLAGS+= -lstdc++
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o

209
cfg/extraction22k.cfg Normal file
View File

@ -0,0 +1,209 @@
[net]
batch=128
subdivisions=1
height=224
width=224
max_crop=320
channels=3
momentum=0.9
decay=0.0005
learning_rate=0.01
max_batches = 0
policy=steps
steps=444000,590000,970000
scales=.5,.2,.1
#policy=sigmoid
#gamma=.00008
#step=100000
#max_batches=200000
[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=192
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=2048
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=2048
size=3
stride=1
pad=1
activation=leaky
[avgpool]
[connected]
output=21842
activation=leaky
[softmax]
groups=1
[cost]
type=sse

View File

@ -3,102 +3,126 @@ batch=1
subdivisions=1
height=19
width=19
channels=8
channels=1
momentum=0.9
decay=0.0005
learning_rate=0.1
max_batches = 0
policy=steps
steps=50000
scales=.1
policy=poly
power=4
max_batches=400000
[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1
[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1
[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1
[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1
[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1
[convolutional]
filters=256
size=1
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1
[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1
[convolutional]
filters=1
size=1
stride=1
pad=1
activation=leaky
activation=linear
[softmax]

View File

@ -1,9 +1,8 @@
classes=1000
labels = data/inet.labels.list
names = data/shortnames.txt
train = /data/imagenet/imagenet1k.train.list
valid = /data/imagenet/imagenet1k.valid.list
top=5
test = /Users/pjreddie/Documents/sites/selfie/paths.list
train = /data/imagenet/imagenet1k.train.list
valid = /data/imagenet/imagenet1k.valid.list
backup = /home/pjreddie/backup/
labels = data/imagenet.labels.list
names = data/imagenet.shortnames.list
top=5

View File

@ -1,11 +1,14 @@
[net]
batch=64
subdivisions=2
batch=1
subdivisions=1
height=448
width=448
channels=3
momentum=0.9
decay=0.0005
saturation=1.5
exposure=1.5
hue=.1
learning_rate=0.0005
policy=steps
@ -13,15 +16,6 @@ steps=200,400,600,20000,30000
scales=2.5,2,2,.1,.1
max_batches = 40000
[crop]
crop_width=448
crop_height=448
flip=0
angle=0
saturation = 1.5
exposure = 1.5
noadjust=1
[convolutional]
batch_normalize=1
filters=64

257
cfg/yolo.train.cfg Normal file
View File

@ -0,0 +1,257 @@
[net]
batch=64
subdivisions=4
height=448
width=448
channels=3
momentum=0.9
decay=0.0005
saturation=1.5
exposure=1.5
hue=.1
learning_rate=0.0005
policy=steps
steps=200,400,600,20000,30000
scales=2.5,2,2,.1,.1
max_batches = 40000
[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=192
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[local]
size=3
stride=1
pad=1
filters=256
activation=leaky
[dropout]
probability=.5
[connected]
output= 1715
activation=linear
[detection]
classes=20
coords=4
rescore=1
side=7
num=3
softmax=0
sqrt=1
jitter=.2
object_scale=1
noobject_scale=.5
class_scale=1
coord_scale=5

Binary file not shown.

Before

Width:  |  Height:  |  Size: 160 KiB

After

Width:  |  Height:  |  Size: 160 KiB

21842
data/imagenet.labels.list Normal file

File diff suppressed because it is too large Load Diff

21842
data/imagenet.shortnames.list Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,7 @@ void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
void scal_ongpu(int N, float ALPHA, float * X, int INCX);
void supp_ongpu(int N, float ALPHA, float * X, int INCX);
void mask_ongpu(int N, float * X, float mask_num, float * mask);
void const_ongpu(int N, float ALPHA, float *X, int INCX);
void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);

View File

@ -368,6 +368,14 @@ __global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX)
if(i < N) X[i*INCX] = min(ALPHA, max(-ALPHA, X[i*INCX]));
}
__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(i < N) {
if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0;
}
}
__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@ -552,6 +560,12 @@ extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX)
check_error(cudaPeekAtLastError());
}
extern "C" void supp_ongpu(int N, float ALPHA, float * X, int INCX)
{
supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
check_error(cudaPeekAtLastError());
}
extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX)
{
fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
@ -633,6 +647,7 @@ extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *er
}
__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;

View File

@ -39,6 +39,18 @@ list *read_data_cfg(char *filename)
return options;
}
float *get_regression_values(char **labels, int n)
{
float *v = calloc(n, sizeof(float));
int i;
for(i = 0; i < n; ++i){
char *p = strchr(labels[i], ' ');
*p = 0;
v[i] = atof(p+1);
}
return v;
}
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
{
int nthreads = 8;
@ -85,6 +97,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
args.size = net.w;
args.paths = paths;
@ -116,6 +129,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
#ifdef OPENCV
if(0){
int u;
for(u = 0; u < imgs; ++u){
@ -124,6 +138,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
cvWaitKey(0);
}
}
#endif
float loss = train_network(net, train);
if(avg_loss == -1) avg_loss = loss;
@ -440,7 +455,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
char **labels = get_labels(label_list);
list *plist = get_paths(valid_list);
int scales[] = {192, 224, 288, 320, 352};
int scales[] = {224, 288, 320, 352, 384};
int nscales = sizeof(scales)/sizeof(scales[0]);
char **paths = (char **)list_to_array(plist);
@ -484,6 +499,88 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
}
}
void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
srand(2222222);
list *options = read_data_cfg(datacfg);
char *name_list = option_find_str(options, "names", 0);
if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
int top = option_find_int(options, "top", 1);
int i = 0;
char **names = get_labels(name_list);
clock_t time;
int *indexes = calloc(top, sizeof(int));
char buff[256];
char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
}else{
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
image orig = load_image_color(input, 0, 0);
image r = resize_min(orig, 256);
image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224);
float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742};
float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583};
float var[3];
var[0] = std[0]*std[0];
var[1] = std[1]*std[1];
var[2] = std[2]*std[2];
normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h);
float *X = im.data;
time=clock();
float *predictions = network_predict(net, X);
layer l = net.layers[layer_num];
for(i = 0; i < l.c; ++i){
if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]);
}
#ifdef GPU
cuda_pull_array(l.output_gpu, l.output, l.outputs);
#endif
for(i = 0; i < l.outputs; ++i){
printf("%f\n", l.output[i]);
}
/*
printf("\n\nWeights\n");
for(i = 0; i < l.n*l.size*l.size*l.c; ++i){
printf("%f\n", l.filters[i]);
}
printf("\n\nBiases\n");
for(i = 0; i < l.n; ++i){
printf("%f\n", l.biases[i]);
}
*/
top_predictions(net, top, indexes);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
for(i = 0; i < top; ++i){
int index = indexes[i];
printf("%s: %f\n", names[index], predictions[index]);
}
free_image(im);
if (filename) break;
}
}
void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename)
{
network net = parse_network_cfg(cfgfile);
@ -649,6 +746,127 @@ void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_
}
void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
{
#ifdef OPENCV
float threat = 0;
float roll = .2;
printf("Classifier Demo\n");
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
list *options = read_data_cfg(datacfg);
srand(2222222);
CvCapture * cap;
if(filename){
cap = cvCaptureFromFile(filename);
}else{
cap = cvCaptureFromCAM(cam_index);
}
int top = option_find_int(options, "top", 1);
char *name_list = option_find_str(options, "names", 0);
char **names = get_labels(name_list);
int *indexes = calloc(top, sizeof(int));
if(!cap) error("Couldn't connect to webcam.\n");
//cvNamedWindow("Threat", CV_WINDOW_NORMAL);
//cvResizeWindow("Threat", 512, 512);
float fps = 0;
int i;
int count = 0;
while(1){
++count;
struct timeval tval_before, tval_after, tval_result;
gettimeofday(&tval_before, NULL);
image in = get_image_from_stream(cap);
if(!in.data) break;
image in_s = resize_image(in, net.w, net.h);
image out = in;
int x1 = out.w / 20;
int y1 = out.h / 20;
int x2 = 2*x1;
int y2 = out.h - out.h/20;
int border = .01*out.h;
int h = y2 - y1 - 2*border;
int w = x2 - x1 - 2*border;
float *predictions = network_predict(net, in_s.data);
float curr_threat = predictions[0] * 0 + predictions[1] * .6 + predictions[2];
threat = roll * curr_threat + (1-roll) * threat;
draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0);
if(threat > .97) {
draw_box_width(out, x2 + .5 * w + border,
y1 + .02*h - 2*border,
x2 + .5 * w + 6*border,
y1 + .02*h + 3*border, 3*border, 1,0,0);
}
draw_box_width(out, x2 + .5 * w + border,
y1 + .02*h - 2*border,
x2 + .5 * w + 6*border,
y1 + .02*h + 3*border, .5*border, 0,0,0);
draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0);
if(threat > .57) {
draw_box_width(out, x2 + .5 * w + border,
y1 + .42*h - 2*border,
x2 + .5 * w + 6*border,
y1 + .42*h + 3*border, 3*border, 1,1,0);
}
draw_box_width(out, x2 + .5 * w + border,
y1 + .42*h - 2*border,
x2 + .5 * w + 6*border,
y1 + .42*h + 3*border, .5*border, 0,0,0);
draw_box_width(out, x1, y1, x2, y2, border, 0,0,0);
for(i = 0; i < threat * h ; ++i){
float ratio = (float) i / h;
float r = (ratio < .5) ? (2*(ratio)) : 1;
float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5);
draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0);
}
top_predictions(net, top, indexes);
char buff[256];
sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
save_image(out, buff);
printf("\033[2J");
printf("\033[1;1H");
printf("\nFPS:%.0f\n",fps);
for(i = 0; i < top; ++i){
int index = indexes[i];
printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
}
if(0){
show_image(out, "Threat");
cvWaitKey(10);
}
free_image(in_s);
free_image(in);
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
float curr = 1000000.f/((long int)tval_result.tv_usec);
fps = .9*fps + .1*curr;
}
#endif
}
void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
{
#ifdef OPENCV
@ -732,8 +950,10 @@ void run_classifier(int argc, char **argv)
char *layer_s = (argc > 7) ? argv[7]: 0;
int layer = layer_s ? atoi(layer_s) : -1;
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename);
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);

View File

@ -25,6 +25,7 @@ void train_coco(char *cfgfile, char *weightfile)
//char *train_images = "/home/pjreddie/data/voc/test/train.txt";
//char *train_images = "/home/pjreddie/data/coco/train.txt";
char *train_images = "data/coco.trainval.txt";
//char *train_images = "data/bags.train.list";
char *backup_directory = "/home/pjreddie/backup/";
srand(time(0));
data_seed = time(0);
@ -63,6 +64,11 @@ void train_coco(char *cfgfile, char *weightfile)
args.d = &buffer;
args.type = REGION_DATA;
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
//while(i*imgs < N*120){
@ -94,6 +100,11 @@ void train_coco(char *cfgfile, char *weightfile)
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
if(i%100==0){
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
}
free_data(train);
}
char buff[256];

View File

@ -16,13 +16,9 @@ void col2im_cpu(float* data_col,
int ksize, int stride, int pad, float* data_im)
{
int c,h,w;
int height_col = (height - ksize) / stride + 1;
int width_col = (width - ksize) / stride + 1;
if (pad){
height_col = 1 + (height-1) / stride;
width_col = 1 + (width-1) / stride;
pad = ksize/2;
}
int height_col = (height + 2*pad - ksize) / stride + 1;
int width_col = (width + 2*pad - ksize) / stride + 1;
int channels_col = channels * ksize * ksize;
for (c = 0; c < channels_col; ++c) {
int w_offset = c % ksize;

View File

@ -46,7 +46,6 @@ void col2im_ongpu(float *data_col,
int ksize, int stride, int pad, float *data_im){
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
pad = pad ? ksize/2 : 0;
int height_col = (height + 2 * pad - ksize) / stride + 1;
int width_col = (width + 2 * pad - ksize) / stride + 1;
int num_kernels = channels * height * width;

View File

@ -17,7 +17,7 @@ __global__ void binarize_kernel(float *x, int n, float *binary)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (i >= n) return;
binary[i] = (x[i] > 0) ? 1 : -1;
binary[i] = (x[i] >= 0) ? 1 : -1;
}
void binarize_gpu(float *x, int n, float *binary)
@ -60,6 +60,7 @@ __global__ void binarize_filters_kernel(float *filters, int n, int size, float *
mean = mean / size;
for(i = 0; i < size; ++i){
binary[f*size + i] = (filters[f*size + i] > 0) ? mean : -mean;
//binary[f*size + i] = filters[f*size + i];
}
}

View File

@ -70,18 +70,12 @@ void binarize_input(float *input, int n, int size, float *binary)
int convolutional_out_height(convolutional_layer l)
{
int h = l.h;
if (!l.pad) h -= l.size;
else h -= 1;
return h/l.stride + 1;
return (l.h + 2*l.pad - l.size) / l.stride + 1;
}
int convolutional_out_width(convolutional_layer l)
{
int w = l.w;
if (!l.pad) w -= l.size;
else w -= 1;
return w/l.stride + 1;
return (l.w + 2*l.pad - l.size) / l.stride + 1;
}
image get_convolutional_image(convolutional_layer l)
@ -148,8 +142,7 @@ void cudnn_convolutional_setup(layer *l)
cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
cudnnSetFilter4dDescriptor(l->filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
int padding = l->pad ? l->size/2 : 0;
cudnnSetConvolution2dDescriptor(l->convDesc, padding, padding, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->filterDesc,
@ -178,7 +171,7 @@ void cudnn_convolutional_setup(layer *l)
#endif
#endif
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor)
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor)
{
int i;
convolutional_layer l = {0};
@ -193,7 +186,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
l.batch = batch;
l.stride = stride;
l.size = size;
l.pad = pad;
l.pad = padding;
l.batch_normalize = batch_normalize;
l.filters = calloc(c*n*size*size, sizeof(float));

View File

@ -98,6 +98,15 @@ void push_cost_layer(cost_layer l)
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
}
int float_abs_compare (const void * a, const void * b)
{
float fa = *(const float*) a;
if(fa < 0) fa = -fa;
float fb = *(const float*) b;
if(fb < 0) fb = -fb;
return (fa > fb) - (fa < fb);
}
void forward_cost_layer_gpu(cost_layer l, network_state state)
{
if (!state.truth) return;
@ -111,6 +120,16 @@ void forward_cost_layer_gpu(cost_layer l, network_state state)
l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
}
if(l.ratio){
cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
int n = (1-l.ratio) * l.batch*l.inputs;
float thresh = l.delta[n];
thresh = 0;
printf("%f\n", thresh);
supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
}
cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
l.cost[0] = sum_array(l.output, l.batch*l.inputs);
}

View File

@ -13,7 +13,6 @@
#endif
extern void run_voxel(int argc, char **argv);
extern void run_imagenet(int argc, char **argv);
extern void run_yolo(int argc, char **argv);
extern void run_detector(int argc, char **argv);
extern void run_coco(int argc, char **argv);
@ -327,9 +326,7 @@ int main(int argc, char **argv)
}
#endif
if(0==strcmp(argv[1], "imagenet")){
run_imagenet(argc, argv);
} else if (0 == strcmp(argv[1], "average")){
if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "yolo")){
run_yolo(argc, argv);

View File

@ -100,7 +100,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
return X;
}
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
int i;
matrix X;
@ -113,10 +113,7 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
image crop = random_augment_image(im, angle, min, max, size);
int flip = rand_r(&data_seed)%2;
if (flip) flip_image(crop);
float exp = rand_uniform(1./exposure, exposure);
float sat = rand_uniform(1./saturation, saturation);
exposure_image(crop, exp);
exposure_image(crop, sat);
random_distort_image(crop, hue, saturation, exposure);
/*
show_image(im, "orig");
@ -241,6 +238,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
labelpath = find_replace(labelpath, "JPEGImages", "labels");
labelpath = find_replace(labelpath, ".jpg", ".txt");
labelpath = find_replace(labelpath, ".png", ".txt");
labelpath = find_replace(labelpath, ".JPG", ".txt");
labelpath = find_replace(labelpath, ".JPEG", ".txt");
int count = 0;
@ -287,6 +285,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
labelpath = find_replace(labelpath, "JPEGImages", "labels");
labelpath = find_replace(labelpath, ".jpg", ".txt");
labelpath = find_replace(labelpath, ".png", ".txt");
labelpath = find_replace(labelpath, ".JPG", ".txt");
labelpath = find_replace(labelpath, ".JPEG", ".txt");
int count = 0;
@ -443,7 +442,7 @@ void free_data(data d)
}
}
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter)
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure)
{
char **random_paths = get_random_paths(paths, n, m);
int i;
@ -485,6 +484,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
image sized = resize_image(cropped, w, h);
if(flip) flip_image(sized);
random_distort_image(sized, hue, saturation, exposure);
d.X.vals[i] = sized.data;
fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy);
@ -611,7 +611,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
return d;
}
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter)
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
{
char **random_paths = get_random_paths(paths, n, m);
int i;
@ -651,6 +651,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
image sized = resize_image(cropped, w, h);
if(flip) flip_image(sized);
random_distort_image(sized, hue, saturation, exposure);
d.X.vals[i] = sized.data;
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
@ -679,17 +680,17 @@ void *load_thread(void *ptr)
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == STUDY_DATA){
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
} else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){
@ -698,7 +699,7 @@ void *load_thread(void *ptr)
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
}
free(ptr);
@ -740,13 +741,13 @@ data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
return d;
}
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
data d = {0};
d.indexes = calloc(n, sizeof(int));
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
@ -782,25 +783,25 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
return d;
}
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
}
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.w = size;
d.h = size;
d.shallow = 0;
d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
d.y = load_tags_paths(paths, n, k);
if(m) free(paths);
return d;

View File

@ -54,6 +54,7 @@ typedef struct load_args{
float angle;
float saturation;
float exposure;
float hue;
data *d;
image *im;
image *resized;
@ -74,11 +75,12 @@ void print_letters(float *pred, int n);
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_go(char *filename);
box_label *read_boxes(char *filename, int *n);

View File

@ -8,7 +8,7 @@
#include "demo.h"
#include <sys/time.h>
#define FRAMES 1
#define FRAMES 3
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
@ -48,7 +48,7 @@ void *fetch_in_thread(void *ptr)
void *detect_in_thread(void *ptr)
{
float nms = .4;
float nms = .1;
detection_layer l = net.layers[net.n-1];
float *X = det_s.data;
@ -153,13 +153,19 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
show_image(disp, "Demo");
int c = cvWaitKey(1);
if (c == 10){
if(frame_skip == 0) frame_skip = 60;
else if(frame_skip == 4) frame_skip = 0;
else if(frame_skip == 60) frame_skip = 4;
else frame_skip = 0;
if(1){
show_image(disp, "Demo");
int c = cvWaitKey(1);
if (c == 10){
if(frame_skip == 0) frame_skip = 60;
else if(frame_skip == 4) frame_skip = 0;
else if(frame_skip == 60) frame_skip = 4;
else frame_skip = 0;
}
}else{
char buff[256];
sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
save_image(disp, buff);
}
pthread_join(fetch_thread, 0);

View File

@ -22,6 +22,8 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
l.coords = coords;
l.rescore = rescore;
l.side = side;
l.w = side;
l.h = side;
assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
l.cost = calloc(1, sizeof(float));
l.outputs = l.inputs;
@ -44,6 +46,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
int locations = l.side*l.side;
int i,j;
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
//if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1);
int b;
if (l.softmax){
for(b = 0; b < l.batch; ++b){
@ -204,6 +207,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
//if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0);
}
}

View File

@ -51,6 +51,11 @@ void train_detector(char *cfgfile, char *weightfile)
args.d = &buffer;
args.type = DETECTION_DATA;
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
//while(i*imgs < N*120){

View File

@ -18,13 +18,9 @@ void im2col_cpu(float* data_im,
int ksize, int stride, int pad, float* data_col)
{
int c,h,w;
int height_col = (height - ksize) / stride + 1;
int width_col = (width - ksize) / stride + 1;
if (pad){
height_col = 1 + (height-1) / stride;
width_col = 1 + (width-1) / stride;
pad = ksize/2;
}
int height_col = (height + 2*pad - ksize) / stride + 1;
int width_col = (width + 2*pad - ksize) / stride + 1;
int channels_col = channels * ksize * ksize;
for (c = 0; c < channels_col; ++c) {
int w_offset = c % ksize;

View File

@ -33,8 +33,12 @@ __global__ void im2col_gpu_kernel(const int n, const float* data_im,
for (int j = 0; j < ksize; ++j) {
int h = h_in + i;
int w = w_in + j;
*data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
data_im_ptr[i * width + j] : 0;
//*data_col_ptr = data_im_ptr[ii * width + jj];
data_col_ptr += height_col * width_col;
}
}
@ -46,7 +50,6 @@ void im2col_ongpu(float *im,
int ksize, int stride, int pad, float *data_col){
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
pad = pad ? ksize/2 : 0;
int height_col = (height + 2 * pad - ksize) / stride + 1;
int width_col = (width + 2 * pad - ksize) / stride + 1;
int num_kernels = channels * height_col * width_col;
@ -56,93 +59,3 @@ void im2col_ongpu(float *im,
stride, height_col,
width_col, data_col);
}
/*
__global__ void im2col_pad_kernel(float *im,
int channels, int height, int width,
int ksize, int stride, float *data_col)
{
int c,h,w;
int height_col = 1 + (height-1) / stride;
int width_col = 1 + (width-1) / stride;
int channels_col = channels * ksize * ksize;
int pad = ksize/2;
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
int col_size = height_col*width_col*channels_col;
if (id >= col_size) return;
int col_index = id;
w = id % width_col;
id /= width_col;
h = id % height_col;
id /= height_col;
c = id % channels_col;
id /= channels_col;
int w_offset = c % ksize;
int h_offset = (c / ksize) % ksize;
int im_channel = c / ksize / ksize;
int im_row = h_offset + h * stride - pad;
int im_col = w_offset + w * stride - pad;
int im_index = im_col + width*(im_row + height*im_channel);
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
data_col[col_index] = val;
}
__global__ void im2col_nopad_kernel(float *im,
int channels, int height, int width,
int ksize, int stride, float *data_col)
{
int c,h,w;
int height_col = (height - ksize) / stride + 1;
int width_col = (width - ksize) / stride + 1;
int channels_col = channels * ksize * ksize;
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
int col_size = height_col*width_col*channels_col;
if (id >= col_size) return;
int col_index = id;
w = id % width_col;
id /= width_col;
h = id % height_col;
id /= height_col;
c = id % channels_col;
id /= channels_col;
int w_offset = c % ksize;
int h_offset = (c / ksize) % ksize;
int im_channel = c / ksize / ksize;
int im_row = h_offset + h * stride;
int im_col = w_offset + w * stride;
int im_index = im_col + width*(im_row + height*im_channel);
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
data_col[col_index] = val;
}
extern "C" void im2col_ongpu(float *im,
int channels, int height, int width,
int ksize, int stride, int pad, float *data_col)
{
int height_col = (height - ksize) / stride + 1;
int width_col = (width - ksize) / stride + 1;
int channels_col = channels * ksize * ksize;
if (pad){
height_col = 1 + (height-1) / stride;
width_col = 1 + (width-1) / stride;
}
size_t n = channels_col*height_col*width_col;
if(pad)im2col_pad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
else im2col_nopad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
check_error(cudaPeekAtLastError());
}
*/

View File

@ -1,6 +1,7 @@
#include "image.h"
#include "utils.h"
#include "blas.h"
#include "cuda.h"
#include <stdio.h>
#include <math.h>
@ -247,6 +248,26 @@ void constrain_image(image im)
}
void normalize_image(image p)
{
int i;
float min = 9999999;
float max = -999999;
for(i = 0; i < p.h*p.w*p.c; ++i){
float v = p.data[i];
if(v < min) min = v;
if(v > max) max = v;
}
if(max - min < .000000001){
min = 0;
max = 1;
}
for(i = 0; i < p.c*p.w*p.h; ++i){
p.data[i] = (p.data[i] - min)/(max-min);
}
}
void normalize_image2(image p)
{
float *min = calloc(p.c, sizeof(float));
float *max = calloc(p.c, sizeof(float));
@ -320,7 +341,6 @@ void show_image_cv(image p, const char *name)
}
free_image(copy);
if(0){
//if(disp->height < 448 || disp->width < 448 || disp->height > 1000){
int w = 448;
int h = w*p.h/p.w;
if(h > 1000){
@ -334,202 +354,202 @@ void show_image_cv(image p, const char *name)
}
cvShowImage(buff, disp);
cvReleaseImage(&disp);
}
}
#endif
void show_image(image p, const char *name)
{
void show_image(image p, const char *name)
{
#ifdef OPENCV
show_image_cv(p, name);
show_image_cv(p, name);
#else
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
save_image(p, name);
fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
save_image(p, name);
#endif
}
}
#ifdef OPENCV
image get_image_from_stream(CvCapture *cap)
{
IplImage* src = cvQueryFrame(cap);
if (!src) return make_empty_image(0,0,0);
image im = ipl_to_image(src);
rgbgr_image(im);
return im;
}
image get_image_from_stream(CvCapture *cap)
{
IplImage* src = cvQueryFrame(cap);
if (!src) return make_empty_image(0,0,0);
image im = ipl_to_image(src);
rgbgr_image(im);
return im;
}
#endif
#ifdef OPENCV
void save_image_jpg(image p, const char *name)
{
image copy = copy_image(p);
rgbgr_image(copy);
int x,y,k;
void save_image_jpg(image p, const char *name)
{
image copy = copy_image(p);
if(p.c == 3) rgbgr_image(copy);
int x,y,k;
char buff[256];
sprintf(buff, "%s.jpg", name);
char buff[256];
sprintf(buff, "%s.jpg", name);
IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
int step = disp->widthStep;
for(y = 0; y < p.h; ++y){
for(x = 0; x < p.w; ++x){
for(k= 0; k < p.c; ++k){
disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
}
IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
int step = disp->widthStep;
for(y = 0; y < p.h; ++y){
for(x = 0; x < p.w; ++x){
for(k= 0; k < p.c; ++k){
disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
}
}
cvSaveImage(buff, disp,0);
cvReleaseImage(&disp);
free_image(copy);
}
cvSaveImage(buff, disp,0);
cvReleaseImage(&disp);
free_image(copy);
}
#endif
void save_image(image im, const char *name)
{
#ifdef OPENCV
save_image_jpg(im, name);
#else
char buff[256];
//sprintf(buff, "%s (%d)", name, windows);
sprintf(buff, "%s.png", name);
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
int i,k;
for(k = 0; k < im.c; ++k){
for(i = 0; i < im.w*im.h; ++i){
data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
void save_image(image im, const char *name)
{
#ifdef OPENCV
save_image_jpg(im, name);
#else
char buff[256];
//sprintf(buff, "%s (%d)", name, windows);
sprintf(buff, "%s.png", name);
unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
int i,k;
for(k = 0; k < im.c; ++k){
for(i = 0; i < im.w*im.h; ++i){
data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
}
}
int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
free(data);
if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
#endif
}
void show_image_layers(image p, char *name)
{
int i;
char buff[256];
for(i = 0; i < p.c; ++i){
sprintf(buff, "%s - Layer %d", name, i);
image layer = get_image_layer(p, i);
show_image(layer, buff);
free_image(layer);
}
}
void show_image_collapsed(image p, char *name)
{
image c = collapse_image_layers(p, 1);
show_image(c, name);
free_image(c);
}
image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}
image make_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = calloc(h*w*c, sizeof(float));
return out;
}
image make_random_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = calloc(h*w*c, sizeof(float));
int i;
for(i = 0; i < w*h*c; ++i){
out.data[i] = (rand_normal() * .25) + .5;
}
return out;
}
image float_to_image(int w, int h, int c, float *data)
{
image out = make_empty_image(w,h,c);
out.data = data;
return out;
}
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
{
int x, y, c;
float cx = im.w/2.;
float cy = im.h/2.;
image rot = make_image(w, h, im.c);
for(c = 0; c < im.c; ++c){
for(y = 0; y < h; ++y){
for(x = 0; x < w; ++x){
float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
float val = bilinear_interpolate(im, rx, ry, c);
set_pixel(rot, x, y, c, val);
}
}
int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
free(data);
if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
#endif
}
return rot;
}
void show_image_layers(image p, char *name)
{
int i;
char buff[256];
for(i = 0; i < p.c; ++i){
sprintf(buff, "%s - Layer %d", name, i);
image layer = get_image_layer(p, i);
show_image(layer, buff);
free_image(layer);
image rotate_image(image im, float rad)
{
int x, y, c;
float cx = im.w/2.;
float cy = im.h/2.;
image rot = make_image(im.w, im.h, im.c);
for(c = 0; c < im.c; ++c){
for(y = 0; y < im.h; ++y){
for(x = 0; x < im.w; ++x){
float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
float val = bilinear_interpolate(im, rx, ry, c);
set_pixel(rot, x, y, c, val);
}
}
}
return rot;
}
void show_image_collapsed(image p, char *name)
{
image c = collapse_image_layers(p, 1);
show_image(c, name);
free_image(c);
}
void translate_image(image m, float s)
{
int i;
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
}
image make_empty_image(int w, int h, int c)
{
image out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}
void scale_image(image m, float s)
{
int i;
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
}
image make_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = calloc(h*w*c, sizeof(float));
return out;
}
image make_random_image(int w, int h, int c)
{
image out = make_empty_image(w,h,c);
out.data = calloc(h*w*c, sizeof(float));
int i;
for(i = 0; i < w*h*c; ++i){
out.data[i] = (rand_normal() * .25) + .5;
}
return out;
}
image float_to_image(int w, int h, int c, float *data)
{
image out = make_empty_image(w,h,c);
out.data = data;
return out;
}
image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
{
int x, y, c;
float cx = im.w/2.;
float cy = im.h/2.;
image rot = make_image(w, h, im.c);
for(c = 0; c < im.c; ++c){
for(y = 0; y < h; ++y){
for(x = 0; x < w; ++x){
float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
float val = bilinear_interpolate(im, rx, ry, c);
set_pixel(rot, x, y, c, val);
image crop_image(image im, int dx, int dy, int w, int h)
{
image cropped = make_image(w, h, im.c);
int i, j, k;
for(k = 0; k < im.c; ++k){
for(j = 0; j < h; ++j){
for(i = 0; i < w; ++i){
int r = j + dy;
int c = i + dx;
float val = 0;
r = constrain_int(r, 0, im.h-1);
c = constrain_int(c, 0, im.w-1);
if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
val = get_pixel(im, c, r, k);
}
set_pixel(cropped, i, j, k, val);
}
}
return rot;
}
image rotate_image(image im, float rad)
{
int x, y, c;
float cx = im.w/2.;
float cy = im.h/2.;
image rot = make_image(im.w, im.h, im.c);
for(c = 0; c < im.c; ++c){
for(y = 0; y < im.h; ++y){
for(x = 0; x < im.w; ++x){
float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
float val = bilinear_interpolate(im, rx, ry, c);
set_pixel(rot, x, y, c, val);
}
}
}
return rot;
}
void translate_image(image m, float s)
{
int i;
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
}
void scale_image(image m, float s)
{
int i;
for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
}
image crop_image(image im, int dx, int dy, int w, int h)
{
image cropped = make_image(w, h, im.c);
int i, j, k;
for(k = 0; k < im.c; ++k){
for(j = 0; j < h; ++j){
for(i = 0; i < w; ++i){
int r = j + dy;
int c = i + dx;
float val = 0;
r = constrain_int(r, 0, im.h-1);
c = constrain_int(c, 0, im.w-1);
if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
val = get_pixel(im, c, r, k);
}
set_pixel(cropped, i, j, k, val);
}
}
}
return cropped;
}
return cropped;
}
int best_3d_shift_r(image a, image b, int min, int max)
{
@ -666,7 +686,7 @@ void rgb_to_hsv(image im)
v = max;
if(max == 0){
s = 0;
h = -1;
h = 0;
}else{
s = delta/max;
if(r == max){
@ -677,6 +697,7 @@ void rgb_to_hsv(image im)
h = 4 + (r - g) / delta;
}
if (h < 0) h += 6;
h = h/6.;
}
set_pixel(im, i, j, 0, h);
set_pixel(im, i, j, 1, s);
@ -694,7 +715,7 @@ void hsv_to_rgb(image im)
float f, p, q, t;
for(j = 0; j < im.h; ++j){
for(i = 0; i < im.w; ++i){
h = get_pixel(im, i , j, 0);
h = 6 * get_pixel(im, i , j, 0);
s = get_pixel(im, i , j, 1);
v = get_pixel(im, i , j, 2);
if (s == 0) {
@ -781,6 +802,18 @@ void scale_image_channel(image im, int c, float v)
}
}
void translate_image_channel(image im, int c, float v)
{
int i, j;
for(j = 0; j < im.h; ++j){
for(i = 0; i < im.w; ++i){
float pix = get_pixel(im, i, j, c);
pix = pix+v;
set_pixel(im, i, j, c, pix);
}
}
}
image binarize_image(image im)
{
image c = copy_image(im);
@ -800,6 +833,19 @@ void saturate_image(image im, float sat)
constrain_image(im);
}
void hue_image(image im, float hue)
{
rgb_to_hsv(im);
int i;
for(i = 0; i < im.w*im.h; ++i){
im.data[i] = im.data[i] + hue;
if (im.data[i] > 1) im.data[i] -= 1;
if (im.data[i] < 0) im.data[i] += 1;
}
hsv_to_rgb(im);
constrain_image(im);
}
void exposure_image(image im, float sat)
{
rgb_to_hsv(im);
@ -808,6 +854,29 @@ void exposure_image(image im, float sat)
constrain_image(im);
}
void distort_image(image im, float hue, float sat, float val)
{
rgb_to_hsv(im);
scale_image_channel(im, 1, sat);
scale_image_channel(im, 2, val);
int i;
for(i = 0; i < im.w*im.h; ++i){
im.data[i] = im.data[i] + hue;
if (im.data[i] > 1) im.data[i] -= 1;
if (im.data[i] < 0) im.data[i] += 1;
}
hsv_to_rgb(im);
constrain_image(im);
}
void random_distort_image(image im, float hue, float saturation, float exposure)
{
float dhue = rand_uniform(-hue, hue);
float dsat = rand_scale(saturation);
float dexp = rand_scale(exposure);
distort_image(im, dhue, dsat, dexp);
}
void saturate_exposure_image(image im, float sat, float exposure)
{
rgb_to_hsv(im);
@ -876,7 +945,6 @@ image resize_image(image im, int w, int h)
return resized;
}
#include "cuda.h"
void test_resize(char *filename)
{
@ -885,59 +953,40 @@ void test_resize(char *filename)
printf("L2 Norm: %f\n", mag);
image gray = grayscale_image(im);
image sat2 = copy_image(im);
saturate_image(sat2, 2);
image c1 = copy_image(im);
image c2 = copy_image(im);
image c3 = copy_image(im);
image c4 = copy_image(im);
distort_image(c1, .1, 1.5, 1.5);
distort_image(c2, -.1, .66666, .66666);
distort_image(c3, .1, 1.5, .66666);
distort_image(c4, .1, .66666, 1.5);
image sat5 = copy_image(im);
saturate_image(sat5, .5);
image exp2 = copy_image(im);
exposure_image(exp2, 2);
image exp5 = copy_image(im);
exposure_image(exp5, .5);
image bin = binarize_image(im);
/*
#ifdef GPU
image r = resize_image(im, im.w, im.h);
image black = make_image(im.w*2 + 3, im.h*2 + 3, 9);
image black2 = make_image(im.w, im.h, 3);
float *r_gpu = cuda_make_array(r.data, r.w*r.h*r.c);
float *black_gpu = cuda_make_array(black.data, black.w*black.h*black.c);
float *black2_gpu = cuda_make_array(black2.data, black2.w*black2.h*black2.c);
shortcut_gpu(3, r.w, r.h, 1, r_gpu, black.w, black.h, 3, black_gpu);
//flip_image(r);
//shortcut_gpu(3, r.w, r.h, 1, r.data, black.w, black.h, 3, black.data);
shortcut_gpu(3, black.w, black.h, 3, black_gpu, black2.w, black2.h, 1, black2_gpu);
cuda_pull_array(black_gpu, black.data, black.w*black.h*black.c);
cuda_pull_array(black2_gpu, black2.data, black2.w*black2.h*black2.c);
show_image_layers(black, "Black");
show_image(black2, "Recreate");
#endif
*/
image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
show_image(rot, "Rotated");
show_image(rot2, "base");
show_image(rot3, "Rotated2");
/*
show_image(im, "Original");
show_image(bin, "Binary");
show_image(gray, "Gray");
show_image(sat2, "Saturation-2");
show_image(sat5, "Saturation-.5");
show_image(exp2, "Exposure-2");
show_image(exp5, "Exposure-.5");
*/
show_image(c1, "C1");
show_image(c2, "C2");
show_image(c3, "C3");
show_image(c4, "C4");
#ifdef OPENCV
cvWaitKey(0);
while(1){
float exposure = 1.15;
float saturation = 1.15;
float hue = .05;
image c = copy_image(im);
float dexp = rand_scale(exposure);
float dsat = rand_scale(saturation);
float dhue = rand_uniform(-hue, hue);
distort_image(c, dhue, dsat, dexp);
show_image(c, "rand");
printf("%f %f %f\n", dhue, dsat, dexp);
free_image(c);
cvWaitKey(0);
}
#endif
}
@ -1180,10 +1229,8 @@ void show_images(image *ims, int n, char *window)
image sized = resize_image(m, w, h);
*/
normalize_image(m);
image sized = resize_image(m, m.w, m.h);
save_image(sized, window);
show_image(sized, window);
free_image(sized);
save_image(m, window);
show_image(m, window);
free_image(m);
}

View File

@ -32,6 +32,7 @@ void scale_image(image m, float s);
image crop_image(image im, int dx, int dy, int w, int h);
image random_crop_image(image im, int w, int h);
image random_augment_image(image im, float angle, int low, int high, int size);
void random_distort_image(image im, float hue, float saturation, float exposure);
image resize_image(image im, int w, int h);
image resize_min(image im, int min);
void translate_image(image m, float s);
@ -41,6 +42,7 @@ void rotate_image_cw(image im, int times);
void embed_image(image source, image dest, int dx, int dy);
void saturate_image(image im, float sat);
void exposure_image(image im, float sat);
void distort_image(image im, float hue, float sat, float val);
void saturate_exposure_image(image im, float sat, float exposure);
void hsv_to_rgb(image im);
void rgbgr_image(image im);

View File

@ -1,237 +0,0 @@
#include "network.h"
#include "utils.h"
#include "parser.h"
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#endif
void train_imagenet(char *cfgfile, char *weightfile)
{
data_seed = time(0);
srand(time(0));
float avg_loss = -1;
char *base = basecfg(cfgfile);
char *backup_directory = "/home/pjreddie/backup/";
printf("%s\n", base);
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = 1024;
char **labels = get_labels("data/inet.labels.list");
list *plist = get_paths("data/inet.train.list");
char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size);
int N = plist->size;
clock_t time;
pthread_t load_thread;
data train;
data buffer;
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.classes = 1000;
args.n = imgs;
args.m = N;
args.labels = labels;
args.d = &buffer;
args.type = OLD_CLASSIFICATION_DATA;
load_thread = load_data_in_thread(args);
int epoch = (*net.seen)/N;
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
time=clock();
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data_in_thread(args);
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = train_network(net, train);
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
free_data(train);
if(*net.seen/N > epoch){
epoch = *net.seen/N;
char buff[256];
sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
save_weights(net, buff);
}
if(*net.seen%1000 == 0){
char buff[256];
sprintf(buff, "%s/%s.backup",backup_directory,base);
save_weights(net, buff);
}
}
char buff[256];
sprintf(buff, "%s/%s.weights", backup_directory, base);
save_weights(net, buff);
pthread_join(load_thread, 0);
free_data(buffer);
free_network(net);
free_ptrs((void**)labels, 1000);
free_ptrs((void**)paths, plist->size);
free_list(plist);
free(base);
}
void validate_imagenet(char *filename, char *weightfile)
{
int i = 0;
network net = parse_network_cfg(filename);
if(weightfile){
load_weights(&net, weightfile);
}
srand(time(0));
char **labels = get_labels("data/inet.labels.list");
//list *plist = get_paths("data/inet.suppress.list");
list *plist = get_paths("data/inet.val.list");
char **paths = (char **)list_to_array(plist);
int m = plist->size;
free_list(plist);
clock_t time;
float avg_acc = 0;
float avg_top5 = 0;
int splits = 50;
int num = (i+1)*m/splits - i*m/splits;
data val, buffer;
load_args args = {0};
args.w = net.w;
args.h = net.h;
args.paths = paths;
args.classes = 1000;
args.n = num;
args.m = 0;
args.labels = labels;
args.d = &buffer;
args.type = OLD_CLASSIFICATION_DATA;
pthread_t load_thread = load_data_in_thread(args);
for(i = 1; i <= splits; ++i){
time=clock();
pthread_join(load_thread, 0);
val = buffer;
num = (i+1)*m/splits - i*m/splits;
char **part = paths+(i*m/splits);
if(i != splits){
args.paths = part;
load_thread = load_data_in_thread(args);
}
printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
time=clock();
float *acc = network_accuracies(net, val, 5);
avg_acc += acc[0];
avg_top5 += acc[1];
printf("%d: top1: %f, top5: %f, %lf seconds, %d images\n", i, avg_acc/i, avg_top5/i, sec(clock()-time), val.X.rows);
free_data(val);
}
}
void test_imagenet(char *cfgfile, char *weightfile, char *filename)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
set_batch_network(&net, 1);
srand(2222222);
int i = 0;
char **names = get_labels("data/shortnames.txt");
clock_t time;
int indexes[10];
char buff[256];
char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
}else{
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input, 256, 256);
float *X = im.data;
time=clock();
float *predictions = network_predict(net, X);
top_predictions(net, 10, indexes);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
for(i = 0; i < 10; ++i){
int index = indexes[i];
printf("%s: %f\n", names[index], predictions[index]);
}
free_image(im);
if (filename) break;
}
}
void run_imagenet(int argc, char **argv)
{
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
char *cfg = argv[3];
char *weights = (argc > 4) ? argv[4] : 0;
char *filename = (argc > 5) ? argv[5]: 0;
if(0==strcmp(argv[2], "test")) test_imagenet(cfg, weights, filename);
else if(0==strcmp(argv[2], "train")) train_imagenet(cfg, weights);
else if(0==strcmp(argv[2], "valid")) validate_imagenet(cfg, weights);
}
/*
void train_imagenet_distributed(char *address)
{
float avg_loss = 1;
srand(time(0));
network net = parse_network_cfg("cfg/net.cfg");
set_learning_network(&net, 0, 1, 0);
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = net.batch;
int i = 0;
char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list");
list *plist = get_paths("/data/imagenet/cls.train.list");
char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size);
clock_t time;
data train, buffer;
pthread_t load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer);
while(1){
i += 1;
time=clock();
client_update(net, address);
printf("Updated: %lf seconds\n", sec(clock()-time));
time=clock();
pthread_join(load_thread, 0);
train = buffer;
normalize_data_rows(train);
load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer);
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
float loss = train_network(net, train);
avg_loss = avg_loss*.9 + loss*.1;
printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs);
free_data(train);
}
}
*/

View File

@ -72,6 +72,7 @@ struct layer{
float saturation;
float exposure;
float shift;
float ratio;
int softmax;
int classes;
int coords;
@ -82,6 +83,7 @@ struct layer{
int joint;
int noadjust;
int reorg;
int log;
float alpha;
float beta;

View File

@ -18,7 +18,7 @@ image get_maxpool_delta(maxpool_layer l)
return float_to_image(w,h,c,l.delta);
}
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride)
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
{
fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d size, %d stride\n", h,w,c,size,stride);
maxpool_layer l = {0};
@ -27,8 +27,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
l.h = h;
l.w = w;
l.c = c;
l.out_w = (w-1)/stride + 1;
l.out_h = (h-1)/stride + 1;
l.pad = padding;
l.out_w = (w + 2*padding - size + 1)/stride + 1;
l.out_h = (h + 2*padding - size + 1)/stride + 1;
l.out_c = c;
l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = h*w*c;
@ -48,13 +49,12 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
void resize_maxpool_layer(maxpool_layer *l, int w, int h)
{
int stride = l->stride;
l->h = h;
l->w = w;
l->inputs = h*w*l->c;
l->out_w = (w-1)/stride + 1;
l->out_h = (h-1)/stride + 1;
l->out_w = (w + 2*l->pad - l->size + 1)/l->stride + 1;
l->out_h = (h + 2*l->pad - l->size + 1)/l->stride + 1;
l->outputs = l->out_w * l->out_h * l->c;
int output_size = l->outputs * l->batch;
@ -75,11 +75,11 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
void forward_maxpool_layer(const maxpool_layer l, network_state state)
{
int b,i,j,k,m,n;
int w_offset = (-l.size-1)/2 + 1;
int h_offset = (-l.size-1)/2 + 1;
int w_offset = -l.pad;
int h_offset = -l.pad;
int h = (l.h-1)/l.stride + 1;
int w = (l.w-1)/l.stride + 1;
int h = l.out_h;
int w = l.out_w;
int c = l.c;
for(b = 0; b < l.batch; ++b){
@ -112,8 +112,8 @@ void forward_maxpool_layer(const maxpool_layer l, network_state state)
void backward_maxpool_layer(const maxpool_layer l, network_state state)
{
int i;
int h = (l.h-1)/l.stride + 1;
int w = (l.w-1)/l.stride + 1;
int h = l.out_h;
int w = l.out_w;
int c = l.c;
for(i = 0; i < h*w*c*l.batch; ++i){
int index = l.indexes[i];

View File

@ -9,7 +9,7 @@
typedef layer maxpool_layer;
image get_maxpool_image(maxpool_layer l);
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
void forward_maxpool_layer(const maxpool_layer l, network_state state);
void backward_maxpool_layer(const maxpool_layer l, network_state state);

View File

@ -7,10 +7,10 @@ extern "C" {
#include "cuda.h"
}
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *input, float *output, int *indexes)
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
{
int h = (in_h-1)/stride + 1;
int w = (in_w-1)/stride + 1;
int h = (in_h + 2*pad - size + 1)/stride + 1;
int w = (in_w + 2*pad - size + 1)/stride + 1;
int c = in_c;
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@ -24,8 +24,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
id /= c;
int b = id;
int w_offset = (-size-1)/2 + 1;
int h_offset = (-size-1)/2 + 1;
int w_offset = -pad;
int h_offset = -pad;
int out_index = j + w*(i + h*(k + c*b));
float max = -INFINITY;
@ -47,10 +47,10 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
indexes[out_index] = max_i;
}
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *delta, float *prev_delta, int *indexes)
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
{
int h = (in_h-1)/stride + 1;
int w = (in_w-1)/stride + 1;
int h = (in_h + 2*pad - size + 1)/stride + 1;
int w = (in_w + 2*pad - size + 1)/stride + 1;
int c = in_c;
int area = (size-1)/stride;
@ -66,8 +66,8 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
id /= in_c;
int b = id;
int w_offset = (-size-1)/2 + 1;
int h_offset = (-size-1)/2 + 1;
int w_offset = -pad;
int h_offset = -pad;
float d = 0;
int l, m;
@ -86,13 +86,13 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
{
int h = (layer.h-1)/layer.stride + 1;
int w = (layer.w-1)/layer.stride + 1;
int h = layer.out_h;
int w = layer.out_w;
int c = layer.c;
size_t n = h*w*c*layer.batch;
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu);
forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
check_error(cudaPeekAtLastError());
}
@ -100,7 +100,7 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
{
size_t n = layer.h*layer.w*layer.c*layer.batch;
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu);
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
check_error(cudaPeekAtLastError());
}

View File

@ -420,7 +420,7 @@ int resize_network(network *net, int w, int h)
net->h = h;
int inputs = 0;
size_t workspace_size = 0;
//fprintf(stderr, "Resizing to %d x %d...", w, h);
//fprintf(stderr, "Resizing to %d x %d...\n", w, h);
//fflush(stderr);
for (i = 0; i < net->n; ++i){
layer l = net->layers[i];

View File

@ -43,6 +43,7 @@ typedef struct network{
float angle;
float exposure;
float saturation;
float hue;
int gpu_index;

View File

@ -2,6 +2,7 @@
#include <string.h>
#include <stdlib.h>
#include "blas.h"
#include "parser.h"
#include "assert.h"
#include "activations.h"
@ -147,7 +148,10 @@ convolutional_layer parse_convolutional(list *options, size_params params)
int n = option_find_int(options, "filters",1);
int size = option_find_int(options, "size",1);
int stride = option_find_int(options, "stride",1);
int pad = option_find_int(options, "pad",0);
int pad = option_find_int_quiet(options, "pad",0);
int padding = option_find_int_quiet(options, "padding",0);
if(pad) padding = size/2;
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
@ -161,7 +165,7 @@ convolutional_layer parse_convolutional(list *options, size_params params)
int binary = option_find_int_quiet(options, "binary", 0);
int xnor = option_find_int_quiet(options, "xnor", 0);
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize, binary, xnor);
convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor);
layer.flipped = option_find_int_quiet(options, "flipped", 0);
layer.dot = option_find_float_quiet(options, "dot", 0);
@ -234,9 +238,16 @@ layer parse_region(list *options, size_params params)
int coords = option_find_int(options, "coords", 4);
int classes = option_find_int(options, "classes", 20);
int num = option_find_int(options, "num", 1);
params.w = option_find_int(options, "side", params.w);
params.h = option_find_int(options, "side", params.h);
layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
assert(l.outputs == params.inputs);
l.log = option_find_int_quiet(options, "log", 0);
l.sqrt = option_find_int_quiet(options, "sqrt", 0);
l.softmax = option_find_int(options, "softmax", 0);
l.max_boxes = option_find_int_quiet(options, "max",30);
l.jitter = option_find_float(options, "jitter", .2);
@ -278,6 +289,7 @@ cost_layer parse_cost(list *options, size_params params)
COST_TYPE type = get_cost_type(type_s);
float scale = option_find_float_quiet(options, "scale",1);
cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
layer.ratio = option_find_float_quiet(options, "ratio",0);
return layer;
}
@ -324,6 +336,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
{
int stride = option_find_int(options, "stride",1);
int size = option_find_int(options, "size",stride);
int padding = option_find_int_quiet(options, "padding", (size-1)/2);
int batch,h,w,c;
h = params.h;
@ -332,7 +345,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
batch=params.batch;
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride);
maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
return layer;
}
@ -486,6 +499,7 @@ void parse_net_options(list *options, network *net)
net->angle = option_find_float_quiet(options, "angle", 0);
net->saturation = option_find_float_quiet(options, "saturation", 1);
net->exposure = option_find_float_quiet(options, "exposure", 1);
net->hue = option_find_float_quiet(options, "hue", 0);
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
@ -1085,6 +1099,7 @@ void load_convolutional_weights(layer l, FILE *fp)
fread(l.rolling_variance, sizeof(float), l.n, fp);
}
fread(l.filters, sizeof(float), num, fp);
//if(l.c == 3) scal_cpu(num, 1./256, l.filters, 1);
if (l.flipped) {
transpose_matrix(l.filters, l.c*l.size*l.size, l.n);
}

View File

@ -80,8 +80,8 @@ box get_region_box(float *x, int index, int i, int j, int w, int h, int adjust,
b.w = logistic_activate(x[index + 2]);
b.h = logistic_activate(x[index + 3]);
}
if(adjust && b.w < .01) b.w = .01;
if(adjust && b.h < .01) b.h = .01;
//if(adjust && b.w < .01) b.w = .01;
//if(adjust && b.h < .01) b.h = .01;
return b;
}
@ -149,6 +149,7 @@ void forward_region_layer(const region_layer l, network_state state)
l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
if(best_iou > .5) l.delta[index + 4] = 0;
/*
if(*(state.net.seen) < 6400){
box truth = {0};
truth.x = (i + .5)/l.w;
@ -157,6 +158,7 @@ void forward_region_layer(const region_layer l, network_state state)
truth.h = .5;
delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1);
}
*/
}
}
}

View File

@ -45,6 +45,11 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
args.d = &buffer;
args.type = TAG_DATA;
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
fprintf(stderr, "%d classes\n", net.outputs);
load_thread = load_data_in_thread(args);

View File

@ -585,6 +585,13 @@ float rand_uniform(float min, float max)
return ((float)rand()/RAND_MAX * (max - min)) + min;
}
float rand_scale(float s)
{
float scale = rand_uniform(1, s);
if(rand()%2) return scale;
return 1./scale;
}
float **one_hot_encode(float *a, int n, int k)
{
int i;

View File

@ -42,6 +42,7 @@ float mse_array(float *a, int n);
float rand_normal();
size_t rand_size_t();
float rand_uniform(float min, float max);
float rand_scale(float s);
int rand_int(int min, int max);
float sum_array(float *a, int n);
float mean_array(float *a, int n);

View File

@ -9,37 +9,37 @@
void extract_voxel(char *lfile, char *rfile, char *prefix)
{
#ifdef OPENCV
int w = 1920;
int h = 1080;
#ifdef OPENCV
int shift = 0;
int count = 0;
CvCapture *lcap = cvCaptureFromFile(lfile);
CvCapture *rcap = cvCaptureFromFile(rfile);
while(1){
image l = get_image_from_stream(lcap);
image r = get_image_from_stream(rcap);
if(!l.w || !r.w) break;
if(count%100 == 0) {
shift = best_3d_shift_r(l, r, -l.h/100, l.h/100);
printf("%d\n", shift);
}
image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h);
image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h);
char buff[256];
sprintf(buff, "%s_%05d_l", prefix, count);
save_image(ls, buff);
sprintf(buff, "%s_%05d_r", prefix, count);
save_image(rs, buff);
free_image(l);
free_image(r);
free_image(ls);
free_image(rs);
++count;
int shift = 0;
int count = 0;
CvCapture *lcap = cvCaptureFromFile(lfile);
CvCapture *rcap = cvCaptureFromFile(rfile);
while(1){
image l = get_image_from_stream(lcap);
image r = get_image_from_stream(rcap);
if(!l.w || !r.w) break;
if(count%100 == 0) {
shift = best_3d_shift_r(l, r, -l.h/100, l.h/100);
printf("%d\n", shift);
}
image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h);
image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h);
char buff[256];
sprintf(buff, "%s_%05d_l", prefix, count);
save_image(ls, buff);
sprintf(buff, "%s_%05d_r", prefix, count);
save_image(rs, buff);
free_image(l);
free_image(r);
free_image(ls);
free_image(rs);
++count;
}
#else
printf("need OpenCV for extraction\n");
printf("need OpenCV for extraction\n");
#endif
}
@ -164,6 +164,6 @@ void run_voxel(int argc, char **argv)
else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename);
else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]);
/*
else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights);
*/
else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights);
*/
}

View File

@ -54,6 +54,11 @@ void train_yolo(char *cfgfile, char *weightfile)
args.d = &buffer;
args.type = REGION_DATA;
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
pthread_t load_thread = load_data_in_thread(args);
clock_t time;
//while(i*imgs < N*120){