mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Minor fix
This commit is contained in:
@ -285,25 +285,6 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference)
|
||||
//l->fw_algo16 = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED;
|
||||
//l->bd_algo16 = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED;
|
||||
//l->bf_algo16 = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED;
|
||||
|
||||
int fw = 0, bd = 0, bf = 0;
|
||||
if (l->fw_algo16 == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM) fw = 1;
|
||||
//printf("Tensor Cores - Forward enabled: l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM \n");
|
||||
if (l->fw_algo16 == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED) fw = 2;
|
||||
//printf("Tensor Cores - Forward enabled: l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED \n");
|
||||
|
||||
if (l->bd_algo16 == CUDNN_CONVOLUTION_BWD_DATA_ALGO_1) bd = 1;
|
||||
//printf("Tensor Cores - Backward-data enabled: l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 \n");
|
||||
if (l->bd_algo16 == CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED) bd = 2;
|
||||
//printf("Tensor Cores - Backward-data enabled: l->bd_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED \n");
|
||||
|
||||
if (l->bf_algo16 == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1) bf = 1;
|
||||
//printf("Tensor Cores - Backward-filter enabled: l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 \n");
|
||||
if (l->bf_algo16 == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED) bf = 2;
|
||||
//printf("Tensor Cores - Backward-filter enabled: l->bf_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED \n");
|
||||
|
||||
//if (fw == 2 && bd == 2 && bf == 2) printf("TF ");
|
||||
//else if (fw == 1 && bd == 1 && bf == 1) printf("TH ");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -91,7 +91,7 @@ cudaStream_t get_cuda_memcpy_stream() {
|
||||
cudaError_t status = cudaStreamCreate(&streamsArray2[i]);
|
||||
//cudaError_t status = cudaStreamCreateWithFlags(&streamsArray2[i], cudaStreamNonBlocking);
|
||||
if (status != cudaSuccess) {
|
||||
printf(" cudaStreamCreate Memcpy error: %d \n", status);
|
||||
printf(" cudaStreamCreate-Memcpy error: %d \n", status);
|
||||
const char *s = cudaGetErrorString(status);
|
||||
char buffer[256];
|
||||
printf("CUDA Error: %s\n", s);
|
||||
@ -180,6 +180,7 @@ int *cuda_make_int_array(size_t n)
|
||||
int *x_gpu;
|
||||
size_t size = sizeof(int)*n;
|
||||
cudaError_t status = cudaMalloc((void **)&x_gpu, size);
|
||||
if(status != cudaSuccess) fprintf(stderr, " Try to set subdivisions=64 in your cfg-file. \n");
|
||||
check_error(status);
|
||||
return x_gpu;
|
||||
}
|
||||
|
@ -1274,9 +1274,11 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
|
||||
if (!input) break;
|
||||
strtok(input, "\n");
|
||||
}
|
||||
//image im = load_image_resize(input, net.w, net.h, net.c);
|
||||
//image sized = copy_image(im);
|
||||
image im = load_image(input, 0, 0, net.c);
|
||||
int letterbox = 0;
|
||||
image sized = resize_image(im, net.w, net.h);
|
||||
int letterbox = 0;
|
||||
//image sized = letterbox_image(im, net.w, net.h); letterbox = 1;
|
||||
layer l = net.layers[net.n - 1];
|
||||
|
||||
|
@ -593,6 +593,34 @@ image image_data_augmentation(IplImage* ipl, int w, int h,
|
||||
}
|
||||
|
||||
|
||||
image load_image_resize(char *filename, int w, int h, int c)
|
||||
{
|
||||
image out;
|
||||
cv::Mat img(h, w, CV_8UC3);
|
||||
try {
|
||||
int flag = -1;
|
||||
if (c == 0) flag = 1;
|
||||
else if (c == 1) { flag = 0; img = cv::Mat(h, w, CV_8UC1); }
|
||||
else if (c == 3) { flag = 1; img = cv::Mat(h, w, CV_8UC3); }
|
||||
else {
|
||||
fprintf(stderr, "OpenCV can't force load with %d channels\n", c);
|
||||
}
|
||||
//throw std::runtime_error("runtime_error");
|
||||
cv::Mat loaded_image = cv::imread(filename, flag);
|
||||
cv::resize(loaded_image, img, cv::Size(w, h), 0, 0, CV_INTER_LINEAR);
|
||||
cv::cvtColor(img, img, cv::COLOR_RGB2BGR);
|
||||
|
||||
IplImage tmp = img;
|
||||
out = ipl_to_image(&tmp);
|
||||
}
|
||||
catch (...) {
|
||||
fprintf(stderr, "OpenCV can't load image %s channels\n", filename);
|
||||
out = make_image(w, h, c);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
#endif // OPENCV
|
||||
|
||||
// -----------------------------------------------------
|
||||
|
@ -25,6 +25,8 @@ int get_stream_fps_cpp(CvCapture *cap);
|
||||
image image_data_augmentation(IplImage* ipl, int w, int h,
|
||||
int pleft, int ptop, int swidth, int sheight, int flip,
|
||||
float jitter, float dhue, float dsat, float dexp);
|
||||
|
||||
image load_image_resize(char *filename, int w, int h, int c);
|
||||
#endif // OPENCV
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -1880,6 +1880,21 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
|
||||
}
|
||||
*/
|
||||
|
||||
// further optimization - use WMMA GEMM for using Tensor Cores
|
||||
// https://github.com/NVIDIA-developer-blog/code-samples/blob/master/posts/tensor-cores/simpleTensorCoreGEMM.cu
|
||||
// https://github.com/NVIDIA/cuda-samples/blob/master/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm.cu
|
||||
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#wmma-subbyte
|
||||
// nvcuda::wmma::col_major -> cutlass::MatrixLayout::kColumnMajor (matrix is not transposed)
|
||||
|
||||
// Matrix A Matrix B Accumulator Matrix Size (m-n-k)
|
||||
// precision::b1 precision::b1 int 8x8x128
|
||||
|
||||
// The only dimensions currently supported by WMMA for XNOR
|
||||
// const int WMMA_M = 8;
|
||||
// const int WMMA_N = 8;
|
||||
// const int WMMA_K = 128;
|
||||
|
||||
|
||||
// GOOD
|
||||
void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K,
|
||||
unsigned char *A, int lda,
|
||||
|
Reference in New Issue
Block a user