Fixed input_shortcut_gpu() for layers with different numbers of channels

This commit is contained in:
AlexeyAB
2019-06-08 17:42:53 +03:00
parent 3ff27b1cdd
commit 12db38ccbf

View File

@ -749,7 +749,9 @@ extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1,
if (sample < 1) sample = 1;
int size = batch * minw * minh * minc;
input_shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
//input_shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
simple_copy_ongpu(w2 * h2 * c2 * batch, in, out);
shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
CHECK_CUDA(cudaPeekAtLastError());
}