mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Maybe something changed?
This commit is contained in:
parent
5c9a773bb6
commit
edbccdfcaf
@ -499,7 +499,7 @@ void train_nist()
|
|||||||
int iters = 10000/net.batch;
|
int iters = 10000/net.batch;
|
||||||
while(++count <= 2000){
|
while(++count <= 2000){
|
||||||
clock_t start = clock(), end;
|
clock_t start = clock(), end;
|
||||||
float loss = train_network_sgd(net, train, iters);
|
float loss = train_network_sgd_gpu(net, train, iters);
|
||||||
end = clock();
|
end = clock();
|
||||||
float test_acc = network_accuracy(net, test);
|
float test_acc = network_accuracy(net, test);
|
||||||
//float test_acc = 0;
|
//float test_acc = 0;
|
||||||
@ -957,8 +957,9 @@ void test_distribution()
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
//test_gpu_blas();
|
test_gpu_blas();
|
||||||
train_imagenet();
|
//train_imagenet();
|
||||||
|
//train_nist();
|
||||||
fprintf(stderr, "Success!\n");
|
fprintf(stderr, "Success!\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -23,11 +23,11 @@ __kernel void col2im(__global float *data_col, int batch,
|
|||||||
|
|
||||||
int w_start = (w<ksize)?0:(w-ksize)/stride + 1;
|
int w_start = (w<ksize)?0:(w-ksize)/stride + 1;
|
||||||
int w_end = w/stride + 1;
|
int w_end = w/stride + 1;
|
||||||
if(width_col < w_end) w_end = width_col;
|
w_end = (width_col < w_end) ? width_col : w_end;
|
||||||
|
|
||||||
int h_start = (h<ksize)?0:(h-ksize)/stride+1;
|
int h_start = (h<ksize)?0:(h-ksize)/stride+1;
|
||||||
int h_end = h/stride + 1;
|
int h_end = h/stride + 1;
|
||||||
if(height_col < h_end) h_end = height_col;
|
h_end = (height_col < h_end) ? height_col : h_end;
|
||||||
|
|
||||||
int rows = channels * ksize * ksize;
|
int rows = channels * ksize * ksize;
|
||||||
int cols = height_col*width_col;
|
int cols = height_col*width_col;
|
||||||
|
@ -342,7 +342,7 @@ void bias_output_gpu(const convolutional_layer layer)
|
|||||||
check_error(cl);
|
check_error(cl);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define TIMEIT
|
//#define TIMEIT
|
||||||
|
|
||||||
void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in)
|
void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in)
|
||||||
{
|
{
|
||||||
|
@ -176,12 +176,14 @@ void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA,
|
|||||||
float BETA,
|
float BETA,
|
||||||
cl_mem C_gpu, int ldc)
|
cl_mem C_gpu, int ldc)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
cl_setup();
|
cl_setup();
|
||||||
cl_command_queue queue = cl.queue;
|
cl_command_queue queue = cl.queue;
|
||||||
cl_event event;
|
cl_event event;
|
||||||
cl.error = clblasSgemm(clblasRowMajor, TA?clblasTrans:clblasNoTrans, TB?clblasTrans:clblasNoTrans,M, N, K,ALPHA, A_gpu, 0, lda,B_gpu, 0, ldb,BETA, C_gpu, 0, ldc,1, &queue, 0, NULL, &event);
|
cl.error = clblasSgemm(clblasRowMajor, TA?clblasTrans:clblasNoTrans, TB?clblasTrans:clblasNoTrans,M, N, K,ALPHA, A_gpu, 0, lda,B_gpu, 0, ldb,BETA, C_gpu, 0, ldc,1, &queue, 0, NULL, &event);
|
||||||
|
|
||||||
//gemm_ongpu_new(TA, TB, M, N, K, ALPHA, A_gpu, lda, B_gpu, ldb, BETA, C_gpu, ldc);
|
*/
|
||||||
|
gemm_ongpu_new(TA, TB, M, N, K, ALPHA, A_gpu, lda, B_gpu, ldb, BETA, C_gpu, ldc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gemm_ongpu_new(int TA, int TB, int M, int N, int K, float ALPHA,
|
void gemm_ongpu_new(int TA, int TB, int M, int N, int K, float ALPHA,
|
||||||
|
@ -91,12 +91,10 @@ void im2col_ongpu(cl_mem data_im, int batch,
|
|||||||
width_col = 1 + (width-1) / stride;
|
width_col = 1 + (width-1) / stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t global_size[2];
|
size_t global_size = batch*channels_col*height_col*width_col;
|
||||||
global_size[0] = batch*channels_col;
|
|
||||||
global_size[1] = height_col*width_col;
|
|
||||||
|
|
||||||
clEnqueueNDRangeKernel(queue, im2col_kernel, 2, 0,
|
clEnqueueNDRangeKernel(queue, im2col_kernel, 1, 0,
|
||||||
global_size, 0, 0, 0, 0);
|
&global_size, 0, 0, 0, 0);
|
||||||
check_error(cl);
|
check_error(cl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,21 +16,22 @@ __kernel void im2col(__global float *data_im, int batch,
|
|||||||
int c,h,w,b;
|
int c,h,w,b;
|
||||||
int height_col = (height - ksize) / stride + 1;
|
int height_col = (height - ksize) / stride + 1;
|
||||||
int width_col = (width - ksize) / stride + 1;
|
int width_col = (width - ksize) / stride + 1;
|
||||||
|
int channels_col = channels * ksize * ksize;
|
||||||
if (pad){
|
if (pad){
|
||||||
height_col = 1 + (height-1) / stride;
|
height_col = 1 + (height-1) / stride;
|
||||||
width_col = 1 + (width-1) / stride;
|
width_col = 1 + (width-1) / stride;
|
||||||
pad = ksize/2;
|
pad = ksize/2;
|
||||||
}
|
}
|
||||||
int gid1 = get_global_id(0);
|
int id = get_global_id(0);
|
||||||
b = gid1%batch;
|
w = id % width_col;
|
||||||
c = gid1/batch;
|
id /= width_col;
|
||||||
|
h = id % height_col;
|
||||||
|
id /= height_col;
|
||||||
|
c = id % channels_col;
|
||||||
|
id /= channels_col;
|
||||||
|
b = id % batch;
|
||||||
|
id /= batch;
|
||||||
|
|
||||||
int gid2 = get_global_id(1);
|
|
||||||
h = gid2%height_col;
|
|
||||||
w = gid2/height_col;
|
|
||||||
|
|
||||||
|
|
||||||
int channels_col = channels * ksize * ksize;
|
|
||||||
int col_size = height_col*width_col*channels_col;
|
int col_size = height_col*width_col*channels_col;
|
||||||
int w_offset = c % ksize;
|
int w_offset = c % ksize;
|
||||||
int h_offset = (c / ksize) % ksize;
|
int h_offset = (c / ksize) % ksize;
|
||||||
|
@ -38,7 +38,7 @@ void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
|
|||||||
//printf("start\n");
|
//printf("start\n");
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
clock_t time = clock();
|
//clock_t time = clock();
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||||
forward_convolutional_layer_gpu(layer, input);
|
forward_convolutional_layer_gpu(layer, input);
|
||||||
@ -63,7 +63,7 @@ void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
|
|||||||
forward_softmax_layer_gpu(layer, input);
|
forward_softmax_layer_gpu(layer, input);
|
||||||
input = layer.output_cl;
|
input = layer.output_cl;
|
||||||
}
|
}
|
||||||
printf("%d %f\n", i, sec(clock()-time));
|
//printf("%d %f\n", i, sec(clock()-time));
|
||||||
/*
|
/*
|
||||||
else if(net.types[i] == CROP){
|
else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *)net.layers[i];
|
crop_layer layer = *(crop_layer *)net.layers[i];
|
||||||
@ -386,6 +386,7 @@ float train_network_datum_gpu(network net, float *x, float *y)
|
|||||||
{
|
{
|
||||||
int x_size = get_network_input_size(net)*net.batch;
|
int x_size = get_network_input_size(net)*net.batch;
|
||||||
int y_size = get_network_output_size(net)*net.batch;
|
int y_size = get_network_output_size(net)*net.batch;
|
||||||
|
clock_t time = clock();
|
||||||
if(!*net.input_cl){
|
if(!*net.input_cl){
|
||||||
*net.input_cl = cl_make_array(x, x_size);
|
*net.input_cl = cl_make_array(x, x_size);
|
||||||
*net.truth_cl = cl_make_array(y, y_size);
|
*net.truth_cl = cl_make_array(y, y_size);
|
||||||
@ -393,10 +394,18 @@ float train_network_datum_gpu(network net, float *x, float *y)
|
|||||||
cl_write_array(*net.input_cl, x, x_size);
|
cl_write_array(*net.input_cl, x, x_size);
|
||||||
cl_write_array(*net.truth_cl, y, y_size);
|
cl_write_array(*net.truth_cl, y, y_size);
|
||||||
}
|
}
|
||||||
|
//printf("trans %f\n", sec(clock()-time));
|
||||||
|
time = clock();
|
||||||
forward_network_gpu(net, *net.input_cl, *net.truth_cl, 1);
|
forward_network_gpu(net, *net.input_cl, *net.truth_cl, 1);
|
||||||
|
//printf("forw %f\n", sec(clock()-time));
|
||||||
|
time = clock();
|
||||||
backward_network_gpu(net, *net.input_cl);
|
backward_network_gpu(net, *net.input_cl);
|
||||||
|
//printf("back %f\n", sec(clock()-time));
|
||||||
|
time = clock();
|
||||||
float error = get_network_cost(net);
|
float error = get_network_cost(net);
|
||||||
update_network_gpu(net);
|
update_network_gpu(net);
|
||||||
|
//printf("updt %f\n", sec(clock()-time));
|
||||||
|
time = clock();
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user