mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Some changes
This commit is contained in:
parent
b394337824
commit
0305fb4d99
2
Makefile
2
Makefile
@ -36,7 +36,7 @@ endif
|
|||||||
|
|
||||||
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o
|
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o
|
||||||
ifeq ($(GPU), 1)
|
ifeq ($(GPU), 1)
|
||||||
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o
|
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o coco_kernels.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
OBJS = $(addprefix $(OBJDIR), $(OBJ))
|
||||||
|
240
cfg/yolo-coco.cfg
Normal file
240
cfg/yolo-coco.cfg
Normal file
@ -0,0 +1,240 @@
|
|||||||
|
[net]
|
||||||
|
batch=64
|
||||||
|
subdivisions=4
|
||||||
|
height=448
|
||||||
|
width=448
|
||||||
|
channels=3
|
||||||
|
momentum=0.9
|
||||||
|
decay=0.0005
|
||||||
|
|
||||||
|
learning_rate=0.0001
|
||||||
|
policy=steps
|
||||||
|
steps=100,200,300,100000,150000
|
||||||
|
scales=2.5,2,2,.1,.1
|
||||||
|
max_batches = 300000
|
||||||
|
|
||||||
|
[crop]
|
||||||
|
crop_width=448
|
||||||
|
crop_height=448
|
||||||
|
flip=0
|
||||||
|
angle=0
|
||||||
|
saturation = 1.5
|
||||||
|
exposure = 1.5
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=64
|
||||||
|
size=7
|
||||||
|
stride=2
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=192
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
|
||||||
|
#################################
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=1024
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=3
|
||||||
|
stride=2
|
||||||
|
pad=1
|
||||||
|
filters=1024
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=1024
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=1024
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[local]
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=192
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[dropout]
|
||||||
|
probability=.5
|
||||||
|
|
||||||
|
[connected]
|
||||||
|
output= 4410
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[detection]
|
||||||
|
classes=80
|
||||||
|
coords=4
|
||||||
|
rescore=1
|
||||||
|
side=7
|
||||||
|
num=2
|
||||||
|
softmax=0
|
||||||
|
sqrt=1
|
||||||
|
jitter=.2
|
||||||
|
|
||||||
|
object_scale=1
|
||||||
|
noobject_scale=.5
|
||||||
|
class_scale=1
|
||||||
|
coord_scale=5
|
||||||
|
|
@ -10,6 +10,7 @@ extern "C" {
|
|||||||
__device__ float linear_activate_kernel(float x){return x;}
|
__device__ float linear_activate_kernel(float x){return x;}
|
||||||
__device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
|
__device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
|
||||||
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
||||||
|
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
||||||
__device__ float relie_activate_kernel(float x){return x*(x>0);}
|
__device__ float relie_activate_kernel(float x){return x*(x>0);}
|
||||||
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
|
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
|
||||||
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
|
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
|
||||||
@ -24,6 +25,7 @@ __device__ float plse_activate_kernel(float x)
|
|||||||
__device__ float linear_gradient_kernel(float x){return 1;}
|
__device__ float linear_gradient_kernel(float x){return 1;}
|
||||||
__device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
|
__device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
|
||||||
__device__ float relu_gradient_kernel(float x){return (x>0);}
|
__device__ float relu_gradient_kernel(float x){return (x>0);}
|
||||||
|
__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
||||||
__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
|
__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
|
||||||
__device__ float ramp_gradient_kernel(float x){return (x>0)+.1;}
|
__device__ float ramp_gradient_kernel(float x){return (x>0)+.1;}
|
||||||
__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;}
|
__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;}
|
||||||
@ -39,6 +41,8 @@ __device__ float activate_kernel(float x, ACTIVATION a)
|
|||||||
return logistic_activate_kernel(x);
|
return logistic_activate_kernel(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_activate_kernel(x);
|
return relu_activate_kernel(x);
|
||||||
|
case ELU:
|
||||||
|
return elu_activate_kernel(x);
|
||||||
case RELIE:
|
case RELIE:
|
||||||
return relie_activate_kernel(x);
|
return relie_activate_kernel(x);
|
||||||
case RAMP:
|
case RAMP:
|
||||||
@ -62,6 +66,8 @@ __device__ float gradient_kernel(float x, ACTIVATION a)
|
|||||||
return logistic_gradient_kernel(x);
|
return logistic_gradient_kernel(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_gradient_kernel(x);
|
return relu_gradient_kernel(x);
|
||||||
|
case ELU:
|
||||||
|
return elu_gradient_kernel(x);
|
||||||
case RELIE:
|
case RELIE:
|
||||||
return relie_gradient_kernel(x);
|
return relie_gradient_kernel(x);
|
||||||
case RAMP:
|
case RAMP:
|
||||||
|
@ -12,6 +12,8 @@ char *get_activation_string(ACTIVATION a)
|
|||||||
return "logistic";
|
return "logistic";
|
||||||
case RELU:
|
case RELU:
|
||||||
return "relu";
|
return "relu";
|
||||||
|
case ELU:
|
||||||
|
return "elu";
|
||||||
case RELIE:
|
case RELIE:
|
||||||
return "relie";
|
return "relie";
|
||||||
case RAMP:
|
case RAMP:
|
||||||
@ -34,6 +36,7 @@ ACTIVATION get_activation(char *s)
|
|||||||
{
|
{
|
||||||
if (strcmp(s, "logistic")==0) return LOGISTIC;
|
if (strcmp(s, "logistic")==0) return LOGISTIC;
|
||||||
if (strcmp(s, "relu")==0) return RELU;
|
if (strcmp(s, "relu")==0) return RELU;
|
||||||
|
if (strcmp(s, "elu")==0) return ELU;
|
||||||
if (strcmp(s, "relie")==0) return RELIE;
|
if (strcmp(s, "relie")==0) return RELIE;
|
||||||
if (strcmp(s, "plse")==0) return PLSE;
|
if (strcmp(s, "plse")==0) return PLSE;
|
||||||
if (strcmp(s, "linear")==0) return LINEAR;
|
if (strcmp(s, "linear")==0) return LINEAR;
|
||||||
@ -53,6 +56,8 @@ float activate(float x, ACTIVATION a)
|
|||||||
return logistic_activate(x);
|
return logistic_activate(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_activate(x);
|
return relu_activate(x);
|
||||||
|
case ELU:
|
||||||
|
return elu_activate(x);
|
||||||
case RELIE:
|
case RELIE:
|
||||||
return relie_activate(x);
|
return relie_activate(x);
|
||||||
case RAMP:
|
case RAMP:
|
||||||
@ -84,6 +89,8 @@ float gradient(float x, ACTIVATION a)
|
|||||||
return logistic_gradient(x);
|
return logistic_gradient(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_gradient(x);
|
return relu_gradient(x);
|
||||||
|
case ELU:
|
||||||
|
return elu_gradient(x);
|
||||||
case RELIE:
|
case RELIE:
|
||||||
return relie_gradient(x);
|
return relie_gradient(x);
|
||||||
case RAMP:
|
case RAMP:
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include "math.h"
|
#include "math.h"
|
||||||
|
|
||||||
typedef enum{
|
typedef enum{
|
||||||
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY
|
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU
|
||||||
}ACTIVATION;
|
}ACTIVATION;
|
||||||
|
|
||||||
ACTIVATION get_activation(char *s);
|
ACTIVATION get_activation(char *s);
|
||||||
@ -22,6 +22,7 @@ void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta);
|
|||||||
static inline float linear_activate(float x){return x;}
|
static inline float linear_activate(float x){return x;}
|
||||||
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
|
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
|
||||||
static inline float relu_activate(float x){return x*(x>0);}
|
static inline float relu_activate(float x){return x*(x>0);}
|
||||||
|
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
||||||
static inline float relie_activate(float x){return x*(x>0);}
|
static inline float relie_activate(float x){return x*(x>0);}
|
||||||
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
|
static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
|
||||||
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
|
static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
|
||||||
@ -36,6 +37,7 @@ static inline float plse_activate(float x)
|
|||||||
static inline float linear_gradient(float x){return 1;}
|
static inline float linear_gradient(float x){return 1;}
|
||||||
static inline float logistic_gradient(float x){return (1-x)*x;}
|
static inline float logistic_gradient(float x){return (1-x)*x;}
|
||||||
static inline float relu_gradient(float x){return (x>0);}
|
static inline float relu_gradient(float x){return (x>0);}
|
||||||
|
static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
||||||
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
|
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
|
||||||
static inline float ramp_gradient(float x){return (x>0)+.1;}
|
static inline float ramp_gradient(float x){return (x>0)+.1;}
|
||||||
static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
|
static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
|
||||||
|
48
src/coco.c
48
src/coco.c
@ -15,30 +15,7 @@ char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","
|
|||||||
|
|
||||||
int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
|
int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
|
||||||
|
|
||||||
void draw_coco(image im, int num, float thresh, box *boxes, float **probs)
|
image coco_labels[80];
|
||||||
{
|
|
||||||
int classes = 80;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for(i = 0; i < num; ++i){
|
|
||||||
int class = max_index(probs[i], classes);
|
|
||||||
float prob = probs[i][class];
|
|
||||||
if(prob > thresh){
|
|
||||||
int width = sqrt(prob)*5 + 1;
|
|
||||||
printf("%f %s\n", prob, coco_classes[class]);
|
|
||||||
float red = get_color(0,class,classes);
|
|
||||||
float green = get_color(1,class,classes);
|
|
||||||
float blue = get_color(2,class,classes);
|
|
||||||
box b = boxes[i];
|
|
||||||
|
|
||||||
int left = (b.x-b.w/2.)*im.w;
|
|
||||||
int right = (b.x+b.w/2.)*im.w;
|
|
||||||
int top = (b.y-b.h/2.)*im.h;
|
|
||||||
int bot = (b.y+b.h/2.)*im.h;
|
|
||||||
draw_box_width(im, left, top, right, bot, width, red, green, blue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void train_coco(char *cfgfile, char *weightfile)
|
void train_coco(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
@ -368,6 +345,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
|
|||||||
detection_layer l = net.layers[net.n-1];
|
detection_layer l = net.layers[net.n-1];
|
||||||
set_batch_network(&net, 1);
|
set_batch_network(&net, 1);
|
||||||
srand(2222222);
|
srand(2222222);
|
||||||
|
float nms = .4;
|
||||||
clock_t time;
|
clock_t time;
|
||||||
char buff[256];
|
char buff[256];
|
||||||
char *input = buff;
|
char *input = buff;
|
||||||
@ -392,7 +370,8 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
|
|||||||
float *predictions = network_predict(net, X);
|
float *predictions = network_predict(net, X);
|
||||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||||
convert_coco_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
|
convert_coco_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
|
||||||
draw_coco(im, l.side*l.side*l.n, thresh, boxes, probs);
|
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
||||||
|
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
|
||||||
show_image(im, "predictions");
|
show_image(im, "predictions");
|
||||||
|
|
||||||
show_image(sized, "resized");
|
show_image(sized, "resized");
|
||||||
@ -406,9 +385,23 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef OPENCV
|
||||||
|
#ifdef GPU
|
||||||
|
void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
void run_coco(int argc, char **argv)
|
void run_coco(int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < 80; ++i){
|
||||||
|
char buff[256];
|
||||||
|
sprintf(buff, "data/labels/%s.png", coco_classes[i]);
|
||||||
|
coco_labels[i] = load_image_color(buff, 0, 0);
|
||||||
|
}
|
||||||
float thresh = find_float_arg(argc, argv, "-thresh", .2);
|
float thresh = find_float_arg(argc, argv, "-thresh", .2);
|
||||||
|
int cam_index = find_int_arg(argc, argv, "-c", 0);
|
||||||
|
|
||||||
if(argc < 4){
|
if(argc < 4){
|
||||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
|
||||||
return;
|
return;
|
||||||
@ -421,4 +414,9 @@ void run_coco(int argc, char **argv)
|
|||||||
else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
|
else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
|
||||||
else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
|
else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
|
||||||
else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
|
else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
|
||||||
|
#ifdef OPENCV
|
||||||
|
#ifdef GPU
|
||||||
|
else if(0==strcmp(argv[2], "demo")) demo_coco(cfg, weights, thresh, cam_index);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,7 @@ extern "C" {
|
|||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
#include "box.h"
|
#include "box.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
#include <sys/time.h>
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef OPENCV
|
#ifdef OPENCV
|
||||||
@ -17,7 +18,9 @@ extern "C" {
|
|||||||
#include "opencv2/imgproc/imgproc.hpp"
|
#include "opencv2/imgproc/imgproc.hpp"
|
||||||
extern "C" image ipl_to_image(IplImage* src);
|
extern "C" image ipl_to_image(IplImage* src);
|
||||||
extern "C" void convert_coco_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
extern "C" void convert_coco_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
||||||
extern "C" void draw_coco(image im, int num, float thresh, box *boxes, float **probs);
|
|
||||||
|
extern "C" char *coco_classes[];
|
||||||
|
extern "C" image *coco_labels;
|
||||||
|
|
||||||
static float **probs;
|
static float **probs;
|
||||||
static box *boxes;
|
static box *boxes;
|
||||||
@ -27,9 +30,10 @@ static image in_s ;
|
|||||||
static image det ;
|
static image det ;
|
||||||
static image det_s;
|
static image det_s;
|
||||||
static image disp ;
|
static image disp ;
|
||||||
static cv::VideoCapture cap(0);
|
static cv::VideoCapture cap;
|
||||||
|
static float fps = 0;
|
||||||
|
|
||||||
void *fetch_in_thread(void *ptr)
|
void *fetch_in_thread_coco(void *ptr)
|
||||||
{
|
{
|
||||||
cv::Mat frame_m;
|
cv::Mat frame_m;
|
||||||
cap >> frame_m;
|
cap >> frame_m;
|
||||||
@ -40,7 +44,7 @@ void *fetch_in_thread(void *ptr)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *detect_in_thread(void *ptr)
|
void *detect_in_thread_coco(void *ptr)
|
||||||
{
|
{
|
||||||
float nms = .4;
|
float nms = .4;
|
||||||
float thresh = .2;
|
float thresh = .2;
|
||||||
@ -53,12 +57,13 @@ void *detect_in_thread(void *ptr)
|
|||||||
if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
||||||
printf("\033[2J");
|
printf("\033[2J");
|
||||||
printf("\033[1;1H");
|
printf("\033[1;1H");
|
||||||
printf("\nObjects:\n\n");
|
printf("\nFPS:%.0f\n",fps);
|
||||||
draw_coco(det, l.side*l.side*l.n, thresh, boxes, probs);
|
printf("Objects:\n\n");
|
||||||
|
draw_detections(det, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh)
|
extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index)
|
||||||
{
|
{
|
||||||
printf("YOLO demo\n");
|
printf("YOLO demo\n");
|
||||||
net = parse_network_cfg(cfgfile);
|
net = parse_network_cfg(cfgfile);
|
||||||
@ -69,6 +74,8 @@ extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh)
|
|||||||
|
|
||||||
srand(2222222);
|
srand(2222222);
|
||||||
|
|
||||||
|
cv::VideoCapture cam(cam_index);
|
||||||
|
cap = cam;
|
||||||
if(!cap.isOpened()) error("Couldn't connect to webcam.\n");
|
if(!cap.isOpened()) error("Couldn't connect to webcam.\n");
|
||||||
|
|
||||||
detection_layer l = net.layers[net.n-1];
|
detection_layer l = net.layers[net.n-1];
|
||||||
@ -81,19 +88,21 @@ extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh)
|
|||||||
pthread_t fetch_thread;
|
pthread_t fetch_thread;
|
||||||
pthread_t detect_thread;
|
pthread_t detect_thread;
|
||||||
|
|
||||||
fetch_in_thread(0);
|
fetch_in_thread_coco(0);
|
||||||
det = in;
|
det = in;
|
||||||
det_s = in_s;
|
det_s = in_s;
|
||||||
|
|
||||||
fetch_in_thread(0);
|
fetch_in_thread_coco(0);
|
||||||
detect_in_thread(0);
|
detect_in_thread_coco(0);
|
||||||
disp = det;
|
disp = det;
|
||||||
det = in;
|
det = in;
|
||||||
det_s = in_s;
|
det_s = in_s;
|
||||||
|
|
||||||
while(1){
|
while(1){
|
||||||
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
|
struct timeval tval_before, tval_after, tval_result;
|
||||||
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
|
gettimeofday(&tval_before, NULL);
|
||||||
|
if(pthread_create(&fetch_thread, 0, fetch_in_thread_coco, 0)) error("Thread creation failed");
|
||||||
|
if(pthread_create(&detect_thread, 0, detect_in_thread_coco, 0)) error("Thread creation failed");
|
||||||
show_image(disp, "YOLO");
|
show_image(disp, "YOLO");
|
||||||
free_image(disp);
|
free_image(disp);
|
||||||
cvWaitKey(1);
|
cvWaitKey(1);
|
||||||
@ -103,10 +112,15 @@ extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh)
|
|||||||
disp = det;
|
disp = det;
|
||||||
det = in;
|
det = in;
|
||||||
det_s = in_s;
|
det_s = in_s;
|
||||||
|
|
||||||
|
gettimeofday(&tval_after, NULL);
|
||||||
|
timersub(&tval_after, &tval_before, &tval_result);
|
||||||
|
float curr = 1000000.f/((long int)tval_result.tv_usec);
|
||||||
|
fps = .9*fps + .1*curr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh){
|
extern "C" void demo_coco(char *cfgfile, char *weightfile, float thresh, int cam_index){
|
||||||
fprintf(stderr, "YOLO-COCO demo needs OpenCV for webcam images.\n");
|
fprintf(stderr, "YOLO-COCO demo needs OpenCV for webcam images.\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
77
src/image.c
77
src/image.c
@ -28,6 +28,26 @@ float get_color(int c, int x, int max)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void draw_label(image a, int r, int c, image label, const float *rgb)
|
||||||
|
{
|
||||||
|
float ratio = (float) label.w / label.h;
|
||||||
|
int h = label.h;
|
||||||
|
int w = ratio * h;
|
||||||
|
image rl = resize_image(label, w, h);
|
||||||
|
if (r - h >= 0) r = r - h;
|
||||||
|
|
||||||
|
int i, j, k;
|
||||||
|
for(j = 0; j < h && j + r < a.h; ++j){
|
||||||
|
for(i = 0; i < w && i + c < a.w; ++i){
|
||||||
|
for(k = 0; k < label.c; ++k){
|
||||||
|
float val = get_pixel(rl, i, j, k);
|
||||||
|
set_pixel(a, i+c, j+r, k, rgb[k] * val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free_image(rl);
|
||||||
|
}
|
||||||
|
|
||||||
void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b)
|
void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b)
|
||||||
{
|
{
|
||||||
//normalize_image(a);
|
//normalize_image(a);
|
||||||
@ -42,25 +62,25 @@ void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b
|
|||||||
if(y2 < 0) y2 = 0;
|
if(y2 < 0) y2 = 0;
|
||||||
if(y2 >= a.h) y2 = a.h-1;
|
if(y2 >= a.h) y2 = a.h-1;
|
||||||
|
|
||||||
for(i = x1; i < x2; ++i){
|
for(i = x1; i <= x2; ++i){
|
||||||
a.data[i + y1*a.w + 0*a.w*a.h] = b;
|
a.data[i + y1*a.w + 0*a.w*a.h] = r;
|
||||||
a.data[i + y2*a.w + 0*a.w*a.h] = b;
|
a.data[i + y2*a.w + 0*a.w*a.h] = r;
|
||||||
|
|
||||||
a.data[i + y1*a.w + 1*a.w*a.h] = g;
|
a.data[i + y1*a.w + 1*a.w*a.h] = g;
|
||||||
a.data[i + y2*a.w + 1*a.w*a.h] = g;
|
a.data[i + y2*a.w + 1*a.w*a.h] = g;
|
||||||
|
|
||||||
a.data[i + y1*a.w + 2*a.w*a.h] = r;
|
a.data[i + y1*a.w + 2*a.w*a.h] = b;
|
||||||
a.data[i + y2*a.w + 2*a.w*a.h] = r;
|
a.data[i + y2*a.w + 2*a.w*a.h] = b;
|
||||||
}
|
}
|
||||||
for(i = y1; i < y2; ++i){
|
for(i = y1; i <= y2; ++i){
|
||||||
a.data[x1 + i*a.w + 0*a.w*a.h] = b;
|
a.data[x1 + i*a.w + 0*a.w*a.h] = r;
|
||||||
a.data[x2 + i*a.w + 0*a.w*a.h] = b;
|
a.data[x2 + i*a.w + 0*a.w*a.h] = r;
|
||||||
|
|
||||||
a.data[x1 + i*a.w + 1*a.w*a.h] = g;
|
a.data[x1 + i*a.w + 1*a.w*a.h] = g;
|
||||||
a.data[x2 + i*a.w + 1*a.w*a.h] = g;
|
a.data[x2 + i*a.w + 1*a.w*a.h] = g;
|
||||||
|
|
||||||
a.data[x1 + i*a.w + 2*a.w*a.h] = r;
|
a.data[x1 + i*a.w + 2*a.w*a.h] = b;
|
||||||
a.data[x2 + i*a.w + 2*a.w*a.h] = r;
|
a.data[x2 + i*a.w + 2*a.w*a.h] = b;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,6 +105,43 @@ void draw_bbox(image a, box bbox, int w, float r, float g, float b)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for(i = 0; i < num; ++i){
|
||||||
|
int class = max_index(probs[i], classes);
|
||||||
|
float prob = probs[i][class];
|
||||||
|
if(prob > thresh){
|
||||||
|
int width = pow(prob, 1./2.)*10+1;
|
||||||
|
printf("%s: %.2f\n", names[class], prob);
|
||||||
|
int offset = class*17 % classes;
|
||||||
|
float red = get_color(0,offset,classes);
|
||||||
|
float green = get_color(1,offset,classes);
|
||||||
|
float blue = get_color(2,offset,classes);
|
||||||
|
float rgb[3];
|
||||||
|
rgb[0] = red;
|
||||||
|
rgb[1] = green;
|
||||||
|
rgb[2] = blue;
|
||||||
|
box b = boxes[i];
|
||||||
|
|
||||||
|
int left = (b.x-b.w/2.)*im.w;
|
||||||
|
int right = (b.x+b.w/2.)*im.w;
|
||||||
|
int top = (b.y-b.h/2.)*im.h;
|
||||||
|
int bot = (b.y+b.h/2.)*im.h;
|
||||||
|
|
||||||
|
if(left < 0) left = 0;
|
||||||
|
if(right > im.w-1) right = im.w-1;
|
||||||
|
if(top < 0) top = 0;
|
||||||
|
if(bot > im.h-1) bot = im.h-1;
|
||||||
|
|
||||||
|
draw_box_width(im, left, top, right, bot, width, red, green, blue);
|
||||||
|
if (labels) draw_label(im, top + width, left, labels[class], rgb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void flip_image(image a)
|
void flip_image(image a)
|
||||||
{
|
{
|
||||||
int i,j,k;
|
int i,j,k;
|
||||||
|
@ -20,6 +20,8 @@ void flip_image(image a);
|
|||||||
void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b);
|
void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b);
|
||||||
void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
|
void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
|
||||||
void draw_bbox(image a, box bbox, int w, float r, float g, float b);
|
void draw_bbox(image a, box bbox, int w, float r, float g, float b);
|
||||||
|
void draw_label(image a, int r, int c, image label, const float *rgb);
|
||||||
|
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes);
|
||||||
image image_distance(image a, image b);
|
image image_distance(image a, image b);
|
||||||
void scale_image(image m, float s);
|
void scale_image(image m, float s);
|
||||||
image crop_image(image im, int dx, int dy, int w, int h);
|
image crop_image(image im, int dx, int dy, int w, int h);
|
||||||
|
@ -130,11 +130,12 @@ typedef struct {
|
|||||||
float * x_gpu;
|
float * x_gpu;
|
||||||
float * x_norm_gpu;
|
float * x_norm_gpu;
|
||||||
float * weights_gpu;
|
float * weights_gpu;
|
||||||
float * biases_gpu;
|
|
||||||
float * scales_gpu;
|
|
||||||
|
|
||||||
float * weight_updates_gpu;
|
float * weight_updates_gpu;
|
||||||
|
|
||||||
|
float * biases_gpu;
|
||||||
float * bias_updates_gpu;
|
float * bias_updates_gpu;
|
||||||
|
|
||||||
|
float * scales_gpu;
|
||||||
float * scale_updates_gpu;
|
float * scale_updates_gpu;
|
||||||
|
|
||||||
float * output_gpu;
|
float * output_gpu;
|
||||||
|
@ -26,6 +26,17 @@ int get_current_batch(network net)
|
|||||||
return batch_num;
|
return batch_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void reset_momentum(network net)
|
||||||
|
{
|
||||||
|
if (net.momentum == 0) return;
|
||||||
|
net.learning_rate = 0;
|
||||||
|
net.momentum = 0;
|
||||||
|
net.decay = 0;
|
||||||
|
#ifdef GPU
|
||||||
|
if(gpu_index >= 0) update_network_gpu(net);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
float get_current_rate(network net)
|
float get_current_rate(network net)
|
||||||
{
|
{
|
||||||
int batch_num = get_current_batch(net);
|
int batch_num = get_current_batch(net);
|
||||||
@ -41,6 +52,7 @@ float get_current_rate(network net)
|
|||||||
for(i = 0; i < net.num_steps; ++i){
|
for(i = 0; i < net.num_steps; ++i){
|
||||||
if(net.steps[i] > batch_num) return rate;
|
if(net.steps[i] > batch_num) return rate;
|
||||||
rate *= net.scales[i];
|
rate *= net.scales[i];
|
||||||
|
if(net.steps[i] > batch_num - 1) reset_momentum(net);
|
||||||
}
|
}
|
||||||
return rate;
|
return rate;
|
||||||
case EXP:
|
case EXP:
|
||||||
|
@ -51,6 +51,7 @@ float * get_network_delta_gpu_layer(network net, int i);
|
|||||||
float *get_network_output_gpu(network net);
|
float *get_network_output_gpu(network net);
|
||||||
void forward_network_gpu(network net, network_state state);
|
void forward_network_gpu(network net, network_state state);
|
||||||
void backward_network_gpu(network net, network_state state);
|
void backward_network_gpu(network net, network_state state);
|
||||||
|
void update_network_gpu(network net);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float get_current_rate(network net);
|
float get_current_rate(network net);
|
||||||
|
36
src/yolo.c
36
src/yolo.c
@ -11,40 +11,6 @@
|
|||||||
|
|
||||||
char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
|
char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
|
||||||
|
|
||||||
void draw_yolo(image im, int num, float thresh, box *boxes, float **probs)
|
|
||||||
{
|
|
||||||
int classes = 20;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for(i = 0; i < num; ++i){
|
|
||||||
int class = max_index(probs[i], classes);
|
|
||||||
float prob = probs[i][class];
|
|
||||||
if(prob > thresh){
|
|
||||||
int width = pow(prob, 1./2.)*10+1;
|
|
||||||
width = 8;
|
|
||||||
printf("%s: %.2f\n", voc_names[class], prob);
|
|
||||||
class = class * 7 % 20;
|
|
||||||
float red = get_color(0,class,classes);
|
|
||||||
float green = get_color(1,class,classes);
|
|
||||||
float blue = get_color(2,class,classes);
|
|
||||||
//red = green = blue = 0;
|
|
||||||
box b = boxes[i];
|
|
||||||
|
|
||||||
int left = (b.x-b.w/2.)*im.w;
|
|
||||||
int right = (b.x+b.w/2.)*im.w;
|
|
||||||
int top = (b.y-b.h/2.)*im.h;
|
|
||||||
int bot = (b.y+b.h/2.)*im.h;
|
|
||||||
|
|
||||||
if(left < 0) left = 0;
|
|
||||||
if(right > im.w-1) right = im.w-1;
|
|
||||||
if(top < 0) top = 0;
|
|
||||||
if(bot > im.h-1) bot = im.h-1;
|
|
||||||
|
|
||||||
draw_box_width(im, left, top, right, bot, width, red, green, blue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void train_yolo(char *cfgfile, char *weightfile)
|
void train_yolo(char *cfgfile, char *weightfile)
|
||||||
{
|
{
|
||||||
char *train_images = "data/voc.0712.trainval";
|
char *train_images = "data/voc.0712.trainval";
|
||||||
@ -377,7 +343,7 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
|
|||||||
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
|
||||||
convert_yolo_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
|
convert_yolo_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
|
||||||
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
|
||||||
draw_yolo(im, l.side*l.side*l.n, thresh, boxes, probs);
|
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, 0, 20);
|
||||||
show_image(im, "predictions");
|
show_image(im, "predictions");
|
||||||
|
|
||||||
show_image(sized, "resized");
|
show_image(sized, "resized");
|
||||||
|
@ -20,6 +20,8 @@ extern "C" image ipl_to_image(IplImage* src);
|
|||||||
extern "C" void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
extern "C" void convert_yolo_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
|
||||||
extern "C" void draw_yolo(image im, int num, float thresh, box *boxes, float **probs);
|
extern "C" void draw_yolo(image im, int num, float thresh, box *boxes, float **probs);
|
||||||
|
|
||||||
|
extern "C" char *voc_names[];
|
||||||
|
|
||||||
static float **probs;
|
static float **probs;
|
||||||
static box *boxes;
|
static box *boxes;
|
||||||
static network net;
|
static network net;
|
||||||
@ -57,7 +59,7 @@ void *detect_in_thread(void *ptr)
|
|||||||
printf("\033[1;1H");
|
printf("\033[1;1H");
|
||||||
printf("\nFPS:%.0f\n",fps);
|
printf("\nFPS:%.0f\n",fps);
|
||||||
printf("Objects:\n\n");
|
printf("Objects:\n\n");
|
||||||
draw_yolo(det, l.side*l.side*l.n, thresh, boxes, probs);
|
draw_detections(det, l.side*l.side*l.n, thresh, boxes, probs, voc_names, 0, 20);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user