mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Extracting features from VOC with temp filters
This commit is contained in:
parent
118bdd6f62
commit
228d3663f8
@ -3,11 +3,21 @@
|
|||||||
#include "mini_blas.h"
|
#include "mini_blas.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int convolutional_out_height(convolutional_layer layer)
|
||||||
|
{
|
||||||
|
return (layer.h-layer.size)/layer.stride + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int convolutional_out_width(convolutional_layer layer)
|
||||||
|
{
|
||||||
|
return (layer.w-layer.size)/layer.stride + 1;
|
||||||
|
}
|
||||||
|
|
||||||
image get_convolutional_image(convolutional_layer layer)
|
image get_convolutional_image(convolutional_layer layer)
|
||||||
{
|
{
|
||||||
int h,w,c;
|
int h,w,c;
|
||||||
h = layer.out_h;
|
h = convolutional_out_height(layer);
|
||||||
w = layer.out_w;
|
w = convolutional_out_width(layer);
|
||||||
c = layer.n;
|
c = layer.n;
|
||||||
return float_to_image(h,w,c,layer.output);
|
return float_to_image(h,w,c,layer.output);
|
||||||
}
|
}
|
||||||
@ -15,8 +25,8 @@ image get_convolutional_image(convolutional_layer layer)
|
|||||||
image get_convolutional_delta(convolutional_layer layer)
|
image get_convolutional_delta(convolutional_layer layer)
|
||||||
{
|
{
|
||||||
int h,w,c;
|
int h,w,c;
|
||||||
h = layer.out_h;
|
h = convolutional_out_height(layer);
|
||||||
w = layer.out_w;
|
w = convolutional_out_width(layer);
|
||||||
c = layer.n;
|
c = layer.n;
|
||||||
return float_to_image(h,w,c,layer.delta);
|
return float_to_image(h,w,c,layer.delta);
|
||||||
}
|
}
|
||||||
@ -24,7 +34,6 @@ image get_convolutional_delta(convolutional_layer layer)
|
|||||||
convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
|
convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int out_h,out_w;
|
|
||||||
size = 2*(size/2)+1; //HA! And you thought you'd use an even sized filter...
|
size = 2*(size/2)+1; //HA! And you thought you'd use an even sized filter...
|
||||||
convolutional_layer *layer = calloc(1, sizeof(convolutional_layer));
|
convolutional_layer *layer = calloc(1, sizeof(convolutional_layer));
|
||||||
layer->h = h;
|
layer->h = h;
|
||||||
@ -47,15 +56,13 @@ convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int si
|
|||||||
//layer->biases[i] = rand_normal()*scale + scale;
|
//layer->biases[i] = rand_normal()*scale + scale;
|
||||||
layer->biases[i] = 0;
|
layer->biases[i] = 0;
|
||||||
}
|
}
|
||||||
out_h = (h-size)/stride + 1;
|
int out_h = (h-size)/stride + 1;
|
||||||
out_w = (w-size)/stride + 1;
|
int out_w = (w-size)/stride + 1;
|
||||||
|
|
||||||
layer->col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
|
layer->col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
|
||||||
layer->output = calloc(out_h * out_w * n, sizeof(float));
|
layer->output = calloc(out_h * out_w * n, sizeof(float));
|
||||||
layer->delta = calloc(out_h * out_w * n, sizeof(float));
|
layer->delta = calloc(out_h * out_w * n, sizeof(float));
|
||||||
layer->activation = activation;
|
layer->activation = activation;
|
||||||
layer->out_h = out_h;
|
|
||||||
layer->out_w = out_w;
|
|
||||||
|
|
||||||
fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
|
fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
|
||||||
srand(0);
|
srand(0);
|
||||||
@ -90,7 +97,10 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in)
|
|||||||
void gradient_delta_convolutional_layer(convolutional_layer layer)
|
void gradient_delta_convolutional_layer(convolutional_layer layer)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < layer.out_h*layer.out_w*layer.n; ++i){
|
int size = convolutional_out_height(layer)
|
||||||
|
*convolutional_out_width(layer)
|
||||||
|
*layer.n;
|
||||||
|
for(i = 0; i < size; ++i){
|
||||||
layer.delta[i] *= gradient(layer.output[i], layer.activation);
|
layer.delta[i] *= gradient(layer.output[i], layer.activation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -98,7 +108,8 @@ void gradient_delta_convolutional_layer(convolutional_layer layer)
|
|||||||
void learn_bias_convolutional_layer(convolutional_layer layer)
|
void learn_bias_convolutional_layer(convolutional_layer layer)
|
||||||
{
|
{
|
||||||
int i,j;
|
int i,j;
|
||||||
int size = layer.out_h*layer.out_w;
|
int size = convolutional_out_height(layer)
|
||||||
|
*convolutional_out_width(layer);
|
||||||
for(i = 0; i < layer.n; ++i){
|
for(i = 0; i < layer.n; ++i){
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
for(j = 0; j < size; ++j){
|
for(j = 0; j < size; ++j){
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int h,w,c;
|
int h,w,c;
|
||||||
int out_h, out_w, out_c;
|
|
||||||
int n;
|
int n;
|
||||||
int size;
|
int size;
|
||||||
int stride;
|
int stride;
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "list.h"
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#define DATA_H
|
#define DATA_H
|
||||||
|
|
||||||
#include "matrix.h"
|
#include "matrix.h"
|
||||||
|
#include "list.h"
|
||||||
|
|
||||||
typedef struct{
|
typedef struct{
|
||||||
matrix X;
|
matrix X;
|
||||||
@ -16,6 +17,7 @@ data load_data_image_pathfile_part(char *filename, int part, int total,
|
|||||||
char **labels, int k, int h, int w);
|
char **labels, int k, int h, int w);
|
||||||
data load_data_image_pathfile_random(char *filename, int n, char **labels,
|
data load_data_image_pathfile_random(char *filename, int n, char **labels,
|
||||||
int k, int h, int w);
|
int k, int h, int w);
|
||||||
|
list *get_paths(char *filename);
|
||||||
data load_categorical_data_csv(char *filename, int target, int k);
|
data load_categorical_data_csv(char *filename, int target, int k);
|
||||||
void normalize_data_rows(data d);
|
void normalize_data_rows(data d);
|
||||||
void scale_data_rows(data d, float s);
|
void scale_data_rows(data d, float s);
|
||||||
|
31
src/image.c
31
src/image.c
@ -342,21 +342,11 @@ IplImage* resizeImage(const IplImage *origImg, int newHeight, int newWidth,
|
|||||||
return outImg;
|
return outImg;
|
||||||
}
|
}
|
||||||
|
|
||||||
image load_image(char *filename, int h, int w)
|
image ipl_to_image(IplImage* src)
|
||||||
{
|
{
|
||||||
IplImage* src = 0;
|
|
||||||
if( (src = cvLoadImage(filename,-1)) == 0 )
|
|
||||||
{
|
|
||||||
printf("Cannot load file image %s\n", filename);
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
cvShowImage("Orig", src);
|
|
||||||
IplImage *resized = resizeImage(src, h, w, 1);
|
|
||||||
cvShowImage("Sized", resized);
|
|
||||||
cvWaitKey(0);
|
|
||||||
cvReleaseImage(&src);
|
|
||||||
src = resized;
|
|
||||||
unsigned char *data = (unsigned char *)src->imageData;
|
unsigned char *data = (unsigned char *)src->imageData;
|
||||||
|
int h = src->height;
|
||||||
|
int w = src->width;
|
||||||
int c = src->nChannels;
|
int c = src->nChannels;
|
||||||
int step = src->widthStep;
|
int step = src->widthStep;
|
||||||
image out = make_image(h,w,c);
|
image out = make_image(h,w,c);
|
||||||
@ -369,6 +359,21 @@ image load_image(char *filename, int h, int w)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
image load_image(char *filename, int h, int w)
|
||||||
|
{
|
||||||
|
IplImage* src = 0;
|
||||||
|
if( (src = cvLoadImage(filename,-1)) == 0 )
|
||||||
|
{
|
||||||
|
printf("Cannot load file image %s\n", filename);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
IplImage *resized = resizeImage(src, h, w, 1);
|
||||||
|
cvReleaseImage(&src);
|
||||||
|
src = resized;
|
||||||
|
image out = ipl_to_image(src);
|
||||||
cvReleaseImage(&src);
|
cvReleaseImage(&src);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -34,6 +34,7 @@ image make_random_kernel(int size, int c, float scale);
|
|||||||
image float_to_image(int h, int w, int c, float *data);
|
image float_to_image(int h, int w, int c, float *data);
|
||||||
image copy_image(image p);
|
image copy_image(image p);
|
||||||
image load_image(char *filename, int h, int w);
|
image load_image(char *filename, int h, int w);
|
||||||
|
image ipl_to_image(IplImage* src);
|
||||||
|
|
||||||
float get_pixel(image m, int x, int y, int c);
|
float get_pixel(image m, int x, int y, int c);
|
||||||
float get_pixel_extend(image m, int x, int y, int c);
|
float get_pixel_extend(image m, int x, int y, int c);
|
||||||
|
@ -331,6 +331,34 @@ int get_network_output_size_layer(network net, int i)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int reset_network_size(network net, int h, int w, int c)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < net.n; ++i){
|
||||||
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
|
convolutional_layer *layer = (convolutional_layer *)net.layers[i];
|
||||||
|
layer->h = h;
|
||||||
|
layer->w = w;
|
||||||
|
layer->c = c;
|
||||||
|
image output = get_convolutional_image(*layer);
|
||||||
|
h = output.h;
|
||||||
|
w = output.w;
|
||||||
|
c = output.c;
|
||||||
|
}
|
||||||
|
else if(net.types[i] == MAXPOOL){
|
||||||
|
maxpool_layer *layer = (maxpool_layer *)net.layers[i];
|
||||||
|
layer->h = h;
|
||||||
|
layer->w = w;
|
||||||
|
layer->c = c;
|
||||||
|
image output = get_maxpool_image(*layer);
|
||||||
|
h = output.h;
|
||||||
|
w = output.w;
|
||||||
|
c = output.c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int get_network_output_size(network net)
|
int get_network_output_size(network net)
|
||||||
{
|
{
|
||||||
int i = net.n-1;
|
int i = net.n-1;
|
||||||
|
@ -41,6 +41,7 @@ int get_predicted_class_network(network net);
|
|||||||
void print_network(network net);
|
void print_network(network net);
|
||||||
void visualize_network(network net);
|
void visualize_network(network net);
|
||||||
void save_network(network net, char *filename);
|
void save_network(network net, char *filename);
|
||||||
|
int reset_network_size(network net, int h, int w, int c);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
62
src/tests.c
62
src/tests.c
@ -366,20 +366,21 @@ void test_im2row()
|
|||||||
|
|
||||||
void train_VOC()
|
void train_VOC()
|
||||||
{
|
{
|
||||||
network net = parse_network_cfg("cfg/voc_backup_ramp_80.cfg");
|
network net = parse_network_cfg("cfg/voc_backup_sig_20.cfg");
|
||||||
srand(2222222);
|
srand(2222222);
|
||||||
int i = 0;
|
int i = 20;
|
||||||
char *labels[] = {"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"};
|
char *labels[] = {"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"};
|
||||||
float lr = .00001;
|
float lr = .00001;
|
||||||
float momentum = .9;
|
float momentum = .9;
|
||||||
float decay = 0.01;
|
float decay = 0.01;
|
||||||
while(i++ < 1000 || 1){
|
while(i++ < 1000 || 1){
|
||||||
visualize_network(net);
|
|
||||||
cvWaitKey(100);
|
|
||||||
data train = load_data_image_pathfile_random("images/VOC2012/train_paths.txt", 1000, labels, 20, 300, 400);
|
data train = load_data_image_pathfile_random("images/VOC2012/train_paths.txt", 1000, labels, 20, 300, 400);
|
||||||
|
|
||||||
image im = float_to_image(300, 400, 3,train.X.vals[0]);
|
image im = float_to_image(300, 400, 3,train.X.vals[0]);
|
||||||
show_image(im, "input");
|
show_image(im, "input");
|
||||||
|
visualize_network(net);
|
||||||
cvWaitKey(100);
|
cvWaitKey(100);
|
||||||
|
|
||||||
normalize_data_rows(train);
|
normalize_data_rows(train);
|
||||||
clock_t start = clock(), end;
|
clock_t start = clock(), end;
|
||||||
float loss = train_network_sgd(net, train, 1000, lr, momentum, decay);
|
float loss = train_network_sgd(net, train, 1000, lr, momentum, decay);
|
||||||
@ -388,13 +389,61 @@ void train_VOC()
|
|||||||
free_data(train);
|
free_data(train);
|
||||||
if(i%10==0){
|
if(i%10==0){
|
||||||
char buff[256];
|
char buff[256];
|
||||||
sprintf(buff, "cfg/voc_backup_ramp_%d.cfg", i);
|
sprintf(buff, "cfg/voc_backup_sig_%d.cfg", i);
|
||||||
save_network(net, buff);
|
save_network(net, buff);
|
||||||
}
|
}
|
||||||
//lr *= .99;
|
//lr *= .99;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void features_VOC()
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
network net = parse_network_cfg("cfg/voc_features.cfg");
|
||||||
|
char *path_file = "images/VOC2012/all_paths.txt";
|
||||||
|
char *out_dir = "voc_features/";
|
||||||
|
list *paths = get_paths(path_file);
|
||||||
|
node *n = paths->front;
|
||||||
|
while(n){
|
||||||
|
char *path = (char *)n->val;
|
||||||
|
char buff[1024];
|
||||||
|
sprintf(buff, "%s%s.txt",out_dir, path);
|
||||||
|
FILE *fp = fopen(buff, "w");
|
||||||
|
if(fp == 0) file_error(buff);
|
||||||
|
|
||||||
|
IplImage* src = 0;
|
||||||
|
if( (src = cvLoadImage(path,-1)) == 0 )
|
||||||
|
{
|
||||||
|
printf("Cannot load file image %s\n", path);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < 10; ++i){
|
||||||
|
int w = 1024 - 90*i; //PICKED WITH CAREFUL CROSS-VALIDATION!!!!
|
||||||
|
int h = (int)((double)w/src->width * src->height);
|
||||||
|
IplImage *sized = cvCreateImage(cvSize(w,h), src->depth, src->nChannels);
|
||||||
|
cvResize(src, sized, CV_INTER_LINEAR);
|
||||||
|
image im = ipl_to_image(sized);
|
||||||
|
reset_network_size(net, im.h, im.w, im.c);
|
||||||
|
forward_network(net, im.data);
|
||||||
|
free_image(im);
|
||||||
|
image out = get_network_image_layer(net, 5);
|
||||||
|
fprintf(fp, "%d, %d, %d\n",out.c, out.h, out.w);
|
||||||
|
for(j = 0; j < out.c*out.h*out.w; ++j){
|
||||||
|
if(j != 0)fprintf(fp, ",");
|
||||||
|
fprintf(fp, "%g", out.data[j]);
|
||||||
|
}
|
||||||
|
fprintf(fp, "\n");
|
||||||
|
out.c = 1;
|
||||||
|
show_image(out, "output");
|
||||||
|
cvWaitKey(10);
|
||||||
|
cvReleaseImage(&sized);
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
n = n->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
//feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
|
//feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
|
||||||
@ -406,7 +455,8 @@ int main()
|
|||||||
//test_ensemble();
|
//test_ensemble();
|
||||||
//test_nist();
|
//test_nist();
|
||||||
//test_full();
|
//test_full();
|
||||||
train_VOC();
|
//train_VOC();
|
||||||
|
features_VOC();
|
||||||
//test_random_preprocess();
|
//test_random_preprocess();
|
||||||
//test_random_classify();
|
//test_random_classify();
|
||||||
//test_parser();
|
//test_parser();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user