diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index b82fefad..29154824 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -28,26 +28,26 @@ __device__ float hardtan_activate_kernel(float x) return x; } __device__ float linear_activate_kernel(float x){return x;} -__device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));} -__device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;} +__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} +__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} __device__ float relu_activate_kernel(float x){return x*(x>0);} -__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} +__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} __device__ float selu_activate_kernel(float x) { return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x) - 1); } -__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;} -__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;} -__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;} -__device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);} +__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} +__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} +__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} +__device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} __device__ float plse_activate_kernel(float x) { - if(x < -4) return .01 * (x + 4); - if(x > 4) return .01 * (x - 4) + 1; - return .125*x + .5; + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; } __device__ float stair_activate_kernel(float x) { - int n = floor(x); - if (n%2 == 0) return floor(x/2.); - else return (x - n) + floor(x/2.); + int n = floorf(x); + if (n%2 == 0) return floorf(x/2.f); + else return (x - n) + floorf(x/2.f); } @@ -60,17 +60,17 @@ __device__ float linear_gradient_kernel(float x){return 1;} __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} __device__ float loggy_gradient_kernel(float x) { - float y = (x+1.)/2.; + float y = (x+1.F)/2.F; return 2*(1-y)*y; } __device__ float relu_gradient_kernel(float x){return (x>0);} __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} -__device__ float selu_gradient_kernel(float x) { return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732); } -__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;} -__device__ float ramp_gradient_kernel(float x){return (x>0)+.1;} -__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;} +__device__ float selu_gradient_kernel(float x) { return (x >= 0)*1.0507f + (x < 0)*(x + 1.0507f*1.6732f); } +__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} +__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} +__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} __device__ float tanh_gradient_kernel(float x){return 1-x*x;} -__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;} +__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} __device__ float stair_gradient_kernel(float x) { if (floor(x) == x) return 0; diff --git a/src/activations.h b/src/activations.h index 90930d44..442f15a2 100644 --- a/src/activations.h +++ b/src/activations.h @@ -22,9 +22,9 @@ void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta); static inline float stair_activate(float x) { - int n = floor(x); - if (n%2 == 0) return floor(x/2.); - else return (x - n) + floor(x/2.); + int n = floorf(x); + if (n%2 == 0) return floorf(x/2.f); + else return (x - n) + floorf(x/2.f); } static inline float hardtan_activate(float x) { @@ -33,32 +33,32 @@ static inline float hardtan_activate(float x) return x; } static inline float linear_activate(float x){return x;} -static inline float logistic_activate(float x){return 1./(1. + exp(-x));} -static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} +static inline float logistic_activate(float x){return 1.f/(1.f + expf(-x));} +static inline float loggy_activate(float x){return 2.f/(1.f + expf(-x)) - 1;} static inline float relu_activate(float x){return x*(x>0);} -static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} -static inline float selu_activate(float x) { return (x >= 0)*1.0507*x + (x < 0)*1.0507*1.6732*(exp(x) - 1); } -static inline float relie_activate(float x){return (x>0) ? x : .01*x;} -static inline float ramp_activate(float x){return x*(x>0)+.1*x;} -static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} -static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} +static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} +static inline float selu_activate(float x) { return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x) - 1); } +static inline float relie_activate(float x){return (x>0) ? x : .01f*x;} +static inline float ramp_activate(float x){return x*(x>0)+.1f*x;} +static inline float leaky_activate(float x){return (x>0) ? x : .1f*x;} +static inline float tanh_activate(float x){return (expf(2*x)-1)/(expf(2*x)+1);} static inline float plse_activate(float x) { - if(x < -4) return .01 * (x + 4); - if(x > 4) return .01 * (x - 4) + 1; - return .125*x + .5; + if(x < -4) return .01f * (x + 4); + if(x > 4) return .01f * (x - 4) + 1; + return .125f*x + .5f; } static inline float lhtan_activate(float x) { - if(x < 0) return .001*x; - if(x > 1) return .001*(x-1) + 1; + if(x < 0) return .001f*x; + if(x > 1) return .001f*(x-1) + 1; return x; } static inline float lhtan_gradient(float x) { if(x > 0 && x < 1) return 1; - return .001; + return .001f; } static inline float hardtan_gradient(float x) @@ -70,7 +70,7 @@ static inline float linear_gradient(float x){return 1;} static inline float logistic_gradient(float x){return (1-x)*x;} static inline float loggy_gradient(float x) { - float y = (x+1.)/2.; + float y = (x+1.f)/2.f; return 2*(1-y)*y; } static inline float stair_gradient(float x) @@ -80,12 +80,12 @@ static inline float stair_gradient(float x) } static inline float relu_gradient(float x){return (x>0);} static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} -static inline float selu_gradient(float x) { return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732); } -static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} -static inline float ramp_gradient(float x){return (x>0)+.1;} -static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} +static inline float selu_gradient(float x) { return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507f*1.6732f); } +static inline float relie_gradient(float x){return (x>0) ? 1 : .01f;} +static inline float ramp_gradient(float x){return (x>0)+.1f;} +static inline float leaky_gradient(float x){return (x>0) ? 1 : .1f;} static inline float tanh_gradient(float x){return 1-x*x;} -static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} +static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;} #endif diff --git a/src/cuda.c b/src/cuda.c index 77f4dd71..a8ba37fa 100644 --- a/src/cuda.c +++ b/src/cuda.c @@ -54,7 +54,7 @@ void check_error(cudaError_t status) } } -void check_error_extended(cudaError_t status, char *file, int line, char *date_time) +void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time) { if (status != cudaSuccess) printf("CUDA Error: file: %s() : line: %d : build time: %s \n", file, line, date_time); @@ -165,7 +165,7 @@ void cudnn_check_error(cudnnStatus_t status) } } -void cudnn_check_error_extended(cudnnStatus_t status, char *file, int line, char *date_time) +void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time) { if (status != cudaSuccess) printf("\n cuDNN Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); diff --git a/src/cuda.h b/src/cuda.h index ecc83e40..34f68eba 100644 --- a/src/cuda.h +++ b/src/cuda.h @@ -44,7 +44,7 @@ extern int gpu_index; extern "C" { #endif // __cplusplus void check_error(cudaError_t status); - void check_error_extended(cudaError_t status, char *file, int line, char *date_time); + void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time); #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); cublasHandle_t blas_handle(); @@ -70,7 +70,7 @@ extern "C" { cudnnHandle_t cudnn_handle(); enum {cudnn_fastest, cudnn_smallest}; -void cudnn_check_error_extended(cudnnStatus_t status, char *file, int line, char *date_time); +void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time); #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); #endif diff --git a/src/parser.c b/src/parser.c index b2101703..671ed764 100644 --- a/src/parser.c +++ b/src/parser.c @@ -848,7 +848,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) free_list(sections); net.outputs = get_network_output_size(net); net.output = get_network_output(net); - printf("Total BFLOPS %5.3f \n", bflops); + fprintf(stderr, "Total BFLOPS %5.3f \n", bflops); #ifdef GPU get_cuda_stream(); get_cuda_memcpy_stream(); @@ -870,7 +870,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half) } if (workspace_size) { - printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000); + fprintf(stderr, " Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000); net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1); } else {