mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
im2col actually works now
This commit is contained in:
parent
1c0fd9bb47
commit
27d0c922ea
@ -28,7 +28,7 @@ __kernel void im2col_pad(__global float *im, int batch,
|
|||||||
int im_row = h_offset + h * stride - pad;
|
int im_row = h_offset + h * stride - pad;
|
||||||
int im_col = w_offset + w * stride - pad;
|
int im_col = w_offset + w * stride - pad;
|
||||||
|
|
||||||
int im_index = im_col + width*(im_row + height*(im_channel+batch*channels));
|
int im_index = im_col + width*(im_row + height*(im_channel+b*channels));
|
||||||
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
||||||
|
|
||||||
data_col[col_index] = val;
|
data_col[col_index] = val;
|
||||||
@ -61,7 +61,7 @@ __kernel void im2col_nopad(__global float *im, int batch,
|
|||||||
int im_row = h_offset + h * stride;
|
int im_row = h_offset + h * stride;
|
||||||
int im_col = w_offset + w * stride;
|
int im_col = w_offset + w * stride;
|
||||||
|
|
||||||
int im_index = im_col + width*(im_row + height*(im_channel+batch*channels));
|
int im_index = im_col + width*(im_row + height*(im_channel+b*channels));
|
||||||
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];
|
||||||
|
|
||||||
data_col[col_index] = val;
|
data_col[col_index] = val;
|
||||||
|
@ -38,7 +38,7 @@ void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
|
|||||||
//printf("start\n");
|
//printf("start\n");
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < net.n; ++i){
|
for(i = 0; i < net.n; ++i){
|
||||||
clock_t time = clock();
|
//clock_t time = clock();
|
||||||
if(net.types[i] == CONVOLUTIONAL){
|
if(net.types[i] == CONVOLUTIONAL){
|
||||||
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
|
||||||
forward_convolutional_layer_gpu(layer, input);
|
forward_convolutional_layer_gpu(layer, input);
|
||||||
@ -63,7 +63,7 @@ void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train)
|
|||||||
forward_softmax_layer_gpu(layer, input);
|
forward_softmax_layer_gpu(layer, input);
|
||||||
input = layer.output_cl;
|
input = layer.output_cl;
|
||||||
}
|
}
|
||||||
printf("%d %f\n", i, sec(clock()-time));
|
//printf("%d %f\n", i, sec(clock()-time));
|
||||||
/*
|
/*
|
||||||
else if(net.types[i] == CROP){
|
else if(net.types[i] == CROP){
|
||||||
crop_layer layer = *(crop_layer *)net.layers[i];
|
crop_layer layer = *(crop_layer *)net.layers[i];
|
||||||
@ -85,7 +85,7 @@ void backward_network_gpu(network net, cl_mem input)
|
|||||||
cl_mem prev_input;
|
cl_mem prev_input;
|
||||||
cl_mem prev_delta;
|
cl_mem prev_delta;
|
||||||
for(i = net.n-1; i >= 0; --i){
|
for(i = net.n-1; i >= 0; --i){
|
||||||
clock_t time = clock();
|
//clock_t time = clock();
|
||||||
if(i == 0){
|
if(i == 0){
|
||||||
prev_input = input;
|
prev_input = input;
|
||||||
prev_delta = 0;
|
prev_delta = 0;
|
||||||
@ -113,7 +113,7 @@ void backward_network_gpu(network net, cl_mem input)
|
|||||||
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
softmax_layer layer = *(softmax_layer *)net.layers[i];
|
||||||
backward_softmax_layer_gpu(layer, prev_delta);
|
backward_softmax_layer_gpu(layer, prev_delta);
|
||||||
}
|
}
|
||||||
printf("back: %d %f\n", i, sec(clock()-time));
|
//printf("back: %d %f\n", i, sec(clock()-time));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user