mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
...
This commit is contained in:
parent
b711627e84
commit
c604f2d994
4
Makefile
4
Makefile
@ -1,5 +1,5 @@
|
|||||||
GPU=0
|
GPU=1
|
||||||
OPENCV=0
|
OPENCV=1
|
||||||
DEBUG=0
|
DEBUG=0
|
||||||
|
|
||||||
ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20
|
ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
[net]
|
[net]
|
||||||
subdivisions=1
|
subdivisions=1
|
||||||
inputs=256
|
inputs=256
|
||||||
batch = 1
|
batch = 128
|
||||||
time_steps = 1
|
|
||||||
momentum=0.9
|
momentum=0.9
|
||||||
decay=0.0005
|
decay=0.001
|
||||||
max_batches = 50000000
|
max_batches = 50000
|
||||||
|
time_steps=900
|
||||||
learning_rate=0.1
|
learning_rate=0.1
|
||||||
|
|
||||||
[rnn]
|
[rnn]
|
||||||
|
@ -9,6 +9,7 @@ extern "C" {
|
|||||||
|
|
||||||
__device__ float linear_activate_kernel(float x){return x;}
|
__device__ float linear_activate_kernel(float x){return x;}
|
||||||
__device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
|
__device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
|
||||||
|
__device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
|
||||||
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
__device__ float relu_activate_kernel(float x){return x*(x>0);}
|
||||||
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
||||||
__device__ float relie_activate_kernel(float x){return x*(x>0);}
|
__device__ float relie_activate_kernel(float x){return x*(x>0);}
|
||||||
@ -24,6 +25,11 @@ __device__ float plse_activate_kernel(float x)
|
|||||||
|
|
||||||
__device__ float linear_gradient_kernel(float x){return 1;}
|
__device__ float linear_gradient_kernel(float x){return 1;}
|
||||||
__device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
|
__device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
|
||||||
|
__device__ float loggy_gradient_kernel(float x)
|
||||||
|
{
|
||||||
|
float y = (x+1.)/2.;
|
||||||
|
return 2*(1-y)*y;
|
||||||
|
}
|
||||||
__device__ float relu_gradient_kernel(float x){return (x>0);}
|
__device__ float relu_gradient_kernel(float x){return (x>0);}
|
||||||
__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
||||||
__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
|
__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
|
||||||
@ -39,6 +45,8 @@ __device__ float activate_kernel(float x, ACTIVATION a)
|
|||||||
return linear_activate_kernel(x);
|
return linear_activate_kernel(x);
|
||||||
case LOGISTIC:
|
case LOGISTIC:
|
||||||
return logistic_activate_kernel(x);
|
return logistic_activate_kernel(x);
|
||||||
|
case LOGGY:
|
||||||
|
return loggy_activate_kernel(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_activate_kernel(x);
|
return relu_activate_kernel(x);
|
||||||
case ELU:
|
case ELU:
|
||||||
@ -64,6 +72,8 @@ __device__ float gradient_kernel(float x, ACTIVATION a)
|
|||||||
return linear_gradient_kernel(x);
|
return linear_gradient_kernel(x);
|
||||||
case LOGISTIC:
|
case LOGISTIC:
|
||||||
return logistic_gradient_kernel(x);
|
return logistic_gradient_kernel(x);
|
||||||
|
case LOGGY:
|
||||||
|
return loggy_gradient_kernel(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_gradient_kernel(x);
|
return relu_gradient_kernel(x);
|
||||||
case ELU:
|
case ELU:
|
||||||
|
@ -10,6 +10,8 @@ char *get_activation_string(ACTIVATION a)
|
|||||||
switch(a){
|
switch(a){
|
||||||
case LOGISTIC:
|
case LOGISTIC:
|
||||||
return "logistic";
|
return "logistic";
|
||||||
|
case LOGGY:
|
||||||
|
return "loggy";
|
||||||
case RELU:
|
case RELU:
|
||||||
return "relu";
|
return "relu";
|
||||||
case ELU:
|
case ELU:
|
||||||
@ -35,6 +37,7 @@ char *get_activation_string(ACTIVATION a)
|
|||||||
ACTIVATION get_activation(char *s)
|
ACTIVATION get_activation(char *s)
|
||||||
{
|
{
|
||||||
if (strcmp(s, "logistic")==0) return LOGISTIC;
|
if (strcmp(s, "logistic")==0) return LOGISTIC;
|
||||||
|
if (strcmp(s, "loggy")==0) return LOGGY;
|
||||||
if (strcmp(s, "relu")==0) return RELU;
|
if (strcmp(s, "relu")==0) return RELU;
|
||||||
if (strcmp(s, "elu")==0) return ELU;
|
if (strcmp(s, "elu")==0) return ELU;
|
||||||
if (strcmp(s, "relie")==0) return RELIE;
|
if (strcmp(s, "relie")==0) return RELIE;
|
||||||
@ -54,6 +57,8 @@ float activate(float x, ACTIVATION a)
|
|||||||
return linear_activate(x);
|
return linear_activate(x);
|
||||||
case LOGISTIC:
|
case LOGISTIC:
|
||||||
return logistic_activate(x);
|
return logistic_activate(x);
|
||||||
|
case LOGGY:
|
||||||
|
return loggy_activate(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_activate(x);
|
return relu_activate(x);
|
||||||
case ELU:
|
case ELU:
|
||||||
@ -87,6 +92,8 @@ float gradient(float x, ACTIVATION a)
|
|||||||
return linear_gradient(x);
|
return linear_gradient(x);
|
||||||
case LOGISTIC:
|
case LOGISTIC:
|
||||||
return logistic_gradient(x);
|
return logistic_gradient(x);
|
||||||
|
case LOGGY:
|
||||||
|
return loggy_gradient(x);
|
||||||
case RELU:
|
case RELU:
|
||||||
return relu_gradient(x);
|
return relu_gradient(x);
|
||||||
case ELU:
|
case ELU:
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include "math.h"
|
#include "math.h"
|
||||||
|
|
||||||
typedef enum{
|
typedef enum{
|
||||||
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU
|
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY
|
||||||
}ACTIVATION;
|
}ACTIVATION;
|
||||||
|
|
||||||
ACTIVATION get_activation(char *s);
|
ACTIVATION get_activation(char *s);
|
||||||
@ -21,6 +21,7 @@ void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta);
|
|||||||
|
|
||||||
static inline float linear_activate(float x){return x;}
|
static inline float linear_activate(float x){return x;}
|
||||||
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
|
static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
|
||||||
|
static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
|
||||||
static inline float relu_activate(float x){return x*(x>0);}
|
static inline float relu_activate(float x){return x*(x>0);}
|
||||||
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
|
||||||
static inline float relie_activate(float x){return x*(x>0);}
|
static inline float relie_activate(float x){return x*(x>0);}
|
||||||
@ -36,6 +37,11 @@ static inline float plse_activate(float x)
|
|||||||
|
|
||||||
static inline float linear_gradient(float x){return 1;}
|
static inline float linear_gradient(float x){return 1;}
|
||||||
static inline float logistic_gradient(float x){return (1-x)*x;}
|
static inline float logistic_gradient(float x){return (1-x)*x;}
|
||||||
|
static inline float loggy_gradient(float x)
|
||||||
|
{
|
||||||
|
float y = (x+1.)/2.;
|
||||||
|
return 2*(1-y)*y;
|
||||||
|
}
|
||||||
static inline float relu_gradient(float x){return (x>0);}
|
static inline float relu_gradient(float x){return (x>0);}
|
||||||
static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);}
|
||||||
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
|
static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
|
||||||
|
@ -206,7 +206,6 @@ int main(int argc, char **argv)
|
|||||||
gpu_index = find_int_arg(argc, argv, "-i", 0);
|
gpu_index = find_int_arg(argc, argv, "-i", 0);
|
||||||
if(find_arg(argc, argv, "-nogpu")) {
|
if(find_arg(argc, argv, "-nogpu")) {
|
||||||
gpu_index = -1;
|
gpu_index = -1;
|
||||||
printf("nogpu\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef GPU
|
#ifndef GPU
|
||||||
|
@ -34,6 +34,7 @@ struct layer{
|
|||||||
ACTIVATION activation;
|
ACTIVATION activation;
|
||||||
COST_TYPE cost_type;
|
COST_TYPE cost_type;
|
||||||
int batch_normalize;
|
int batch_normalize;
|
||||||
|
int shortcut;
|
||||||
int batch;
|
int batch;
|
||||||
int forced;
|
int forced;
|
||||||
int flipped;
|
int flipped;
|
||||||
|
@ -13,7 +13,7 @@ float abs_mean(float *x, int n)
|
|||||||
int i;
|
int i;
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
for (i = 0; i < n; ++i){
|
for (i = 0; i < n; ++i){
|
||||||
sum += abs(x[i]);
|
sum += fabs(x[i]);
|
||||||
}
|
}
|
||||||
return sum/n;
|
return sum/n;
|
||||||
}
|
}
|
||||||
|
@ -176,8 +176,11 @@ layer parse_rnn(list *options, size_params params)
|
|||||||
char *activation_s = option_find_str(options, "activation", "logistic");
|
char *activation_s = option_find_str(options, "activation", "logistic");
|
||||||
ACTIVATION activation = get_activation(activation_s);
|
ACTIVATION activation = get_activation(activation_s);
|
||||||
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
|
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
|
||||||
|
int logistic = option_find_int_quiet(options, "logistic", 0);
|
||||||
|
|
||||||
layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize);
|
layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize, logistic);
|
||||||
|
|
||||||
|
l.shortcut = option_find_int_quiet(options, "shortcut", 0);
|
||||||
|
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
52
src/rnn.c
52
src/rnn.c
@ -19,6 +19,12 @@ float_pair get_rnn_data(char *text, int len, int batch, int steps)
|
|||||||
int i,j;
|
int i,j;
|
||||||
for(i = 0; i < batch; ++i){
|
for(i = 0; i < batch; ++i){
|
||||||
int index = rand() %(len - steps - 1);
|
int index = rand() %(len - steps - 1);
|
||||||
|
int done = 1;
|
||||||
|
while(!done){
|
||||||
|
index = rand() %(len - steps - 1);
|
||||||
|
while(index < len-steps-1 && text[index++] != '\n');
|
||||||
|
if (index < len-steps-1) done = 1;
|
||||||
|
}
|
||||||
for(j = 0; j < steps; ++j){
|
for(j = 0; j < steps; ++j){
|
||||||
x[(j*batch + i)*256 + text[index + j]] = 1;
|
x[(j*batch + i)*256 + text[index + j]] = 1;
|
||||||
y[(j*batch + i)*256 + text[index + j + 1]] = 1;
|
y[(j*batch + i)*256 + text[index + j + 1]] = 1;
|
||||||
@ -48,13 +54,13 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename)
|
|||||||
srand(time(0));
|
srand(time(0));
|
||||||
data_seed = time(0);
|
data_seed = time(0);
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
fprintf(stderr, "%s\n", base);
|
||||||
float avg_loss = -1;
|
float avg_loss = -1;
|
||||||
network net = parse_network_cfg(cfgfile);
|
network net = parse_network_cfg(cfgfile);
|
||||||
if(weightfile){
|
if(weightfile){
|
||||||
load_weights(&net, weightfile);
|
load_weights(&net, weightfile);
|
||||||
}
|
}
|
||||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
|
||||||
int batch = net.batch;
|
int batch = net.batch;
|
||||||
int steps = net.time_steps;
|
int steps = net.time_steps;
|
||||||
int i = (*net.seen)/net.batch;
|
int i = (*net.seen)/net.batch;
|
||||||
@ -71,7 +77,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename)
|
|||||||
if (avg_loss < 0) avg_loss = loss;
|
if (avg_loss < 0) avg_loss = loss;
|
||||||
avg_loss = avg_loss*.9 + loss*.1;
|
avg_loss = avg_loss*.9 + loss*.1;
|
||||||
|
|
||||||
printf("%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time));
|
fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time));
|
||||||
if(i%100==0){
|
if(i%100==0){
|
||||||
char buff[256];
|
char buff[256];
|
||||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
|
||||||
@ -92,7 +98,7 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
|
|||||||
{
|
{
|
||||||
srand(rseed);
|
srand(rseed);
|
||||||
char *base = basecfg(cfgfile);
|
char *base = basecfg(cfgfile);
|
||||||
printf("%s\n", base);
|
fprintf(stderr, "%s\n", base);
|
||||||
|
|
||||||
network net = parse_network_cfg(cfgfile);
|
network net = parse_network_cfg(cfgfile);
|
||||||
if(weightfile){
|
if(weightfile){
|
||||||
@ -128,6 +134,43 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void valid_char_rnn(char *cfgfile, char *weightfile, char *filename)
|
||||||
|
{
|
||||||
|
FILE *fp = fopen(filename, "r");
|
||||||
|
//FILE *fp = fopen("data/ab.txt", "r");
|
||||||
|
//FILE *fp = fopen("data/grrm/asoiaf.txt", "r");
|
||||||
|
|
||||||
|
fseek(fp, 0, SEEK_END);
|
||||||
|
size_t size = ftell(fp);
|
||||||
|
fseek(fp, 0, SEEK_SET);
|
||||||
|
|
||||||
|
char *text = calloc(size, sizeof(char));
|
||||||
|
fread(text, 1, size, fp);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
char *base = basecfg(cfgfile);
|
||||||
|
fprintf(stderr, "%s\n", base);
|
||||||
|
|
||||||
|
network net = parse_network_cfg(cfgfile);
|
||||||
|
if(weightfile){
|
||||||
|
load_weights(&net, weightfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
int i;
|
||||||
|
char c;
|
||||||
|
float *input = calloc(256, sizeof(float));
|
||||||
|
float sum = 0;
|
||||||
|
for(i = 0; i < size-1; ++i){
|
||||||
|
c = text[i];
|
||||||
|
input[(int)c] = 1;
|
||||||
|
float *out = network_predict(net, input);
|
||||||
|
input[(int)c] = 0;
|
||||||
|
sum += log(out[(int)text[i+1]]);
|
||||||
|
}
|
||||||
|
printf("Log Probability: %f\n", sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void run_char_rnn(int argc, char **argv)
|
void run_char_rnn(int argc, char **argv)
|
||||||
{
|
{
|
||||||
if(argc < 4){
|
if(argc < 4){
|
||||||
@ -143,5 +186,6 @@ void run_char_rnn(int argc, char **argv)
|
|||||||
char *cfg = argv[3];
|
char *cfg = argv[3];
|
||||||
char *weights = (argc > 4) ? argv[4] : 0;
|
char *weights = (argc > 4) ? argv[4] : 0;
|
||||||
if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename);
|
if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename);
|
||||||
|
else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, filename);
|
||||||
else if(0==strcmp(argv[2], "test")) test_char_rnn(cfg, weights, len, seed, temp, rseed);
|
else if(0==strcmp(argv[2], "test")) test_char_rnn(cfg, weights, len, seed, temp, rseed);
|
||||||
}
|
}
|
||||||
|
@ -11,9 +11,9 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize)
|
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log)
|
||||||
{
|
{
|
||||||
printf("%d %d\n", batch, steps);
|
fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs);
|
||||||
batch = batch / steps;
|
batch = batch / steps;
|
||||||
layer l = {0};
|
layer l = {0};
|
||||||
l.batch = batch;
|
l.batch = batch;
|
||||||
@ -25,14 +25,17 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
|
|||||||
l.state = calloc(batch*hidden, sizeof(float));
|
l.state = calloc(batch*hidden, sizeof(float));
|
||||||
|
|
||||||
l.input_layer = malloc(sizeof(layer));
|
l.input_layer = malloc(sizeof(layer));
|
||||||
|
fprintf(stderr, "\t\t");
|
||||||
*(l.input_layer) = make_connected_layer(batch*steps, inputs, hidden, activation, batch_normalize);
|
*(l.input_layer) = make_connected_layer(batch*steps, inputs, hidden, activation, batch_normalize);
|
||||||
l.input_layer->batch = batch;
|
l.input_layer->batch = batch;
|
||||||
|
|
||||||
l.self_layer = malloc(sizeof(layer));
|
l.self_layer = malloc(sizeof(layer));
|
||||||
*(l.self_layer) = make_connected_layer(batch*steps, hidden, hidden, activation, batch_normalize);
|
fprintf(stderr, "\t\t");
|
||||||
|
*(l.self_layer) = make_connected_layer(batch*steps, hidden, hidden, (log==2)?LOGGY:(log==1?LOGISTIC:activation), batch_normalize);
|
||||||
l.self_layer->batch = batch;
|
l.self_layer->batch = batch;
|
||||||
|
|
||||||
l.output_layer = malloc(sizeof(layer));
|
l.output_layer = malloc(sizeof(layer));
|
||||||
|
fprintf(stderr, "\t\t");
|
||||||
*(l.output_layer) = make_connected_layer(batch*steps, hidden, outputs, activation, batch_normalize);
|
*(l.output_layer) = make_connected_layer(batch*steps, hidden, outputs, activation, batch_normalize);
|
||||||
l.output_layer->batch = batch;
|
l.output_layer->batch = batch;
|
||||||
|
|
||||||
@ -46,7 +49,6 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
|
|||||||
l.delta_gpu = l.output_layer->delta_gpu;
|
l.delta_gpu = l.output_layer->delta_gpu;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs);
|
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#include "layer.h"
|
#include "layer.h"
|
||||||
#include "network.h"
|
#include "network.h"
|
||||||
|
|
||||||
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize);
|
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
|
||||||
|
|
||||||
void forward_rnn_layer(layer l, network_state state);
|
void forward_rnn_layer(layer l, network_state state);
|
||||||
void backward_rnn_layer(layer l, network_state state);
|
void backward_rnn_layer(layer l, network_state state);
|
||||||
|
Loading…
Reference in New Issue
Block a user