mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Compile fix
This commit is contained in:
@ -12,6 +12,9 @@ extern "C" {
|
||||
#include <assert.h>
|
||||
#include <cuda.h>
|
||||
|
||||
#define WARP_SIZE 32
|
||||
|
||||
|
||||
// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
|
||||
// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
|
||||
|
||||
@ -66,7 +69,6 @@ void im2col_ongpu(float *im,
|
||||
}
|
||||
// --------------------------------
|
||||
|
||||
#define WARP_SIZE 32
|
||||
|
||||
__global__ void im2col_align_gpu_kernel(const int n, const float* data_im,
|
||||
const int height, const int width, const int ksize,
|
||||
@ -568,7 +570,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
|
||||
int64_t B_cur_index = (j*ldb + k) / 8;
|
||||
if (i >= M) A_cur_index = 0;
|
||||
|
||||
#pragma unroll WARP_SIZE
|
||||
#pragma unroll
|
||||
for (int t = 0; t < WARP_SIZE; ++t) {
|
||||
const int lane_id = threadIdx.x % WARP_SIZE;
|
||||
|
||||
@ -598,7 +600,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
|
||||
int64_t B_cur_index = (j*ldb + k) / 8;
|
||||
if (i >= M) A_cur_index = 0;
|
||||
|
||||
#pragma unroll WARP_SIZE
|
||||
#pragma unroll
|
||||
for (int t = 0; t < WARP_SIZE; ++t) {
|
||||
const int lane_id = threadIdx.x % WARP_SIZE;
|
||||
|
||||
@ -705,7 +707,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
|
||||
int64_t B_cur_index = (local_j*ldb + k) / 8;
|
||||
if (i >= M) A_cur_index = 0;
|
||||
|
||||
#pragma unroll WARP_SIZE
|
||||
#pragma unroll
|
||||
for (int t = 0; t < WARP_SIZE; ++t) {
|
||||
const int lane_id = threadIdx.x % WARP_SIZE;
|
||||
|
||||
@ -735,7 +737,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
|
||||
int64_t B_cur_index = (local_j*ldb + k) / 8;
|
||||
if (i >= M) A_cur_index = 0;
|
||||
|
||||
#pragma unroll WARP_SIZE
|
||||
#pragma unroll
|
||||
for (int t = 0; t < WARP_SIZE; ++t) {
|
||||
const int lane_id = threadIdx.x % WARP_SIZE;
|
||||
|
||||
|
Reference in New Issue
Block a user