mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Maybe some stuff changed
This commit is contained in:
parent
14303717dc
commit
5c9a773bb6
53
src/cnn.c
53
src/cnn.c
@ -957,59 +957,8 @@ void test_distribution()
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
test_gpu_blas();
|
|
||||||
//test_blas();
|
|
||||||
//train_assira();
|
|
||||||
//test_visualize();
|
|
||||||
//test_distribution();
|
|
||||||
//feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
|
|
||||||
//train_imagenet();
|
|
||||||
//test_imagenet();
|
|
||||||
|
|
||||||
//test_blas();
|
|
||||||
//test_visualize();
|
|
||||||
//test_gpu_blas();
|
//test_gpu_blas();
|
||||||
//test_blas();
|
train_imagenet();
|
||||||
//test_convolve_matrix();
|
|
||||||
// test_im2row();
|
|
||||||
//test_split();
|
|
||||||
//test_ensemble();
|
|
||||||
//test_nist_single();
|
|
||||||
//test_nist();
|
|
||||||
//train_nist();
|
|
||||||
//test_convolutional_layer();
|
|
||||||
//test_col2im();
|
|
||||||
//test_cifar10();
|
|
||||||
//train_cifar10();
|
|
||||||
//test_vince();
|
|
||||||
//test_full();
|
|
||||||
//tune_VOC();
|
|
||||||
//features_VOC_image(argv[1], argv[2], argv[3], 0);
|
|
||||||
//features_VOC_image(argv[1], argv[2], argv[3], 1);
|
|
||||||
//train_VOC();
|
|
||||||
//features_VOC_image(argv[1], argv[2], argv[3], 0, 4);
|
|
||||||
//features_VOC_image(argv[1], argv[2], argv[3], 1, 4);
|
|
||||||
//features_VOC_image_size(argv[1], atoi(argv[2]), atoi(argv[3]));
|
|
||||||
//visualize_imagenet_features("data/assira/train.list");
|
|
||||||
//visualize_imagenet_topk("data/VOC2012.list");
|
|
||||||
//visualize_cat();
|
|
||||||
//flip_network();
|
|
||||||
//test_visualize();
|
|
||||||
//test_parser();
|
|
||||||
fprintf(stderr, "Success!\n");
|
fprintf(stderr, "Success!\n");
|
||||||
//test_random_preprocess();
|
|
||||||
//test_random_classify();
|
|
||||||
//test_parser();
|
|
||||||
//test_backpropagate();
|
|
||||||
//test_ann();
|
|
||||||
//test_convolve();
|
|
||||||
//test_upsample();
|
|
||||||
//test_rotate();
|
|
||||||
//test_load();
|
|
||||||
//test_network();
|
|
||||||
//test_convolutional_layer();
|
|
||||||
//verify_convolutional_layer();
|
|
||||||
//test_color();
|
|
||||||
//cvWaitKey(0);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -342,7 +342,7 @@ void bias_output_gpu(const convolutional_layer layer)
|
|||||||
check_error(cl);
|
check_error(cl);
|
||||||
}
|
}
|
||||||
|
|
||||||
//#define TIMEIT
|
#define TIMEIT
|
||||||
|
|
||||||
void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in)
|
void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in)
|
||||||
{
|
{
|
||||||
|
12
src/gemm.c
12
src/gemm.c
@ -176,14 +176,12 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
float BETA,
|
float BETA,
|
||||||
cl_mem C_gpu, int ldc)
|
cl_mem C_gpu, int ldc)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
cl_setup();
|
cl_setup();
|
||||||
cl_command_queue queue = cl.queue;
|
cl_command_queue queue = cl.queue;
|
||||||
cl_event event;
|
cl_event event;
|
||||||
cl.error = clblasSgemm(clblasRowMajor, TA?clblasTrans:clblasNoTrans, TB?clblasTrans:clblasNoTrans,M, N, K,ALPHA, A_gpu, 0, lda,B_gpu, 0, ldb,BETA, C_gpu, 0, ldc,1, &queue, 0, NULL, &event);
|
cl.error = clblasSgemm(clblasRowMajor, TA?clblasTrans:clblasNoTrans, TB?clblasTrans:clblasNoTrans,M, N, K,ALPHA, A_gpu, 0, lda,B_gpu, 0, ldb,BETA, C_gpu, 0, ldc,1, &queue, 0, NULL, &event);
|
||||||
*/
|
|
||||||
|
|
||||||
gemm_ongpu_new(TA, TB, M, N, K, ALPHA, A_gpu, lda, B_gpu, ldb, BETA, C_gpu, ldc);
|
//gemm_ongpu_new(TA, TB, M, N, K, ALPHA, A_gpu, lda, B_gpu, ldb, BETA, C_gpu, ldc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gemm_ongpu_new(int TA, int TB, int M, int N, int K, float ALPHA,
|
void gemm_ongpu_new(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||||
@ -327,7 +325,7 @@ void time_gpu_random_matrix(int TA, int TB, int m, int k, int n)
|
|||||||
|
|
||||||
void time_ongpu(int TA, int TB, int m, int k, int n)
|
void time_ongpu(int TA, int TB, int m, int k, int n)
|
||||||
{
|
{
|
||||||
int iter = 100;
|
int iter = 128;
|
||||||
float *a = random_matrix(m,k);
|
float *a = random_matrix(m,k);
|
||||||
float *b = random_matrix(k,n);
|
float *b = random_matrix(k,n);
|
||||||
|
|
||||||
@ -345,10 +343,10 @@ void time_ongpu(int TA, int TB, int m, int k, int n)
|
|||||||
for(i = 0; i<iter; ++i){
|
for(i = 0; i<iter; ++i){
|
||||||
gemm_ongpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
|
gemm_ongpu(TA,TB,m,n,k,1,a_cl,lda,b_cl,ldb,1,c_cl,n);
|
||||||
}
|
}
|
||||||
int flop = m*n*(2*k+3)*iter;
|
double flop = m*n*(2.*k+3.)*iter;
|
||||||
float gflop = flop/pow(10., 9);
|
double gflop = flop/pow(10., 9);
|
||||||
end = clock();
|
end = clock();
|
||||||
float seconds = sec(end-start);
|
double seconds = sec(end-start);
|
||||||
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
|
printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s, %lf GFLOPS\n",m,k,k,n, TA, TB, seconds, gflop/seconds);
|
||||||
clReleaseMemObject(a_cl);
|
clReleaseMemObject(a_cl);
|
||||||
clReleaseMemObject(b_cl);
|
clReleaseMemObject(b_cl);
|
||||||
|
@ -38,6 +38,7 @@ void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
|
|||||||
//printf("start\n");
|
//printf("start\n");
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
|
clock_t time = clock();
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||||
forward_convolutional_layer_gpu(layer, input);
|
forward_convolutional_layer_gpu(layer, input);
|
||||||
@ -62,6 +63,7 @@ void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
|
|||||||
forward_softmax_layer_gpu(layer, input);
|
forward_softmax_layer_gpu(layer, input);
|
||||||
input = layer.output_cl;
|
input = layer.output_cl;
|
||||||
}
|
}
|
||||||
|
printf("%d %f\n", i, sec(clock()-time));
|
||||||
/*
|
/*
|
||||||
else if(net.types[i] == CROP){
|
else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *)net.layers[i];
|
crop_layer layer = *(crop_layer *)net.layers[i];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user