Fixed input_shortcut_gpu() for layers with different numbers of channels

2023-08-10 21:13:14 +03:00 · 2019-06-08 17:42:53 +03:00
parent 3ff27b1cdd
commit 12db38ccbf
1 changed files with 3 additions and 1 deletions
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@ -749,7 +749,9 @@ extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1,
    if (sample < 1) sample = 1;

    int size = batch * minw * minh * minc;
-    input_shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
+    //input_shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
+    simple_copy_ongpu(w2 * h2 * c2 * batch, in, out);
+    shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
    CHECK_CUDA(cudaPeekAtLastError());
 }