diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 55d5e5d4..ed418c10 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -749,7 +749,9 @@ extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, if (sample < 1) sample = 1; int size = batch * minw * minh * minc; - input_shortcut_kernel << > >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); + //input_shortcut_kernel << > >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); + simple_copy_ongpu(w2 * h2 * c2 * batch, in, out); + shortcut_kernel << > >(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); CHECK_CUDA(cudaPeekAtLastError()); }