mirror of
https://github.com/pjreddie/darknet.git
synced 2023-08-10 21:13:14 +03:00
Fixed bug for 32-bit compilation without GPU.
This commit is contained in:
@ -78,9 +78,15 @@
|
|||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<SDLCheck>true</SDLCheck>
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);</AdditionalIncludeDirectories>
|
||||||
|
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<UndefinePreprocessorDefinitions>OPENCV;</UndefinePreprocessorDefinitions>
|
||||||
|
<ForcedIncludeFiles>stdlib.h;crtdbg.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
<AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||||
|
<AdditionalDependencies>..\..\3rdparty\lib\x86\pthreadVC2.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
@ -89,9 +95,10 @@
|
|||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<SDLCheck>true</SDLCheck>
|
<SDLCheck>true</SDLCheck>
|
||||||
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>_MBCS;OPENCV;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions>
|
<UndefinePreprocessorDefinitions>OPENCV;CUDNN</UndefinePreprocessorDefinitions>
|
||||||
<OpenMPSupport>true</OpenMPSupport>
|
<OpenMPSupport>true</OpenMPSupport>
|
||||||
|
<ForcedIncludeFiles>stdlib.h;crtdbg.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
@ -111,15 +118,17 @@
|
|||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
<SDLCheck>true</SDLCheck>
|
<SDLCheck>true</SDLCheck>
|
||||||
<AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>OPENCV;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
|
<UndefinePreprocessorDefinitions>
|
||||||
|
</UndefinePreprocessorDefinitions>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
<OptimizeReferences>true</OptimizeReferences>
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
<AdditionalLibraryDirectories>C:\opencv_2.4.9\opencv\build\x64\vc12\lib;$(CUDA_PATH)lib\$(PlatformName);$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||||
<AdditionalDependencies>..\..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cudnn.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>..\..\3rdparty\lib\x86\pthreadVC2.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
|
@ -615,7 +615,7 @@ void binary_align_weights(convolutional_layer *l)
|
|||||||
binarize_weights(l->weights, m, k, l->binary_weights);
|
binarize_weights(l->weights, m, k, l->binary_weights);
|
||||||
|
|
||||||
size_t align_weights_size = new_lda * m;
|
size_t align_weights_size = new_lda * m;
|
||||||
l->align_bit_weights_size = align_weights_size / 8;// +1;
|
l->align_bit_weights_size = align_weights_size / 8 + 1;
|
||||||
float *align_weights = calloc(align_weights_size, sizeof(float));
|
float *align_weights = calloc(align_weights_size, sizeof(float));
|
||||||
l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char));
|
l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char));
|
||||||
|
|
||||||
|
25
src/gemm.c
25
src/gemm.c
@ -1719,6 +1719,25 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int popcnt_64(uint64_t val64) {
|
||||||
|
#ifdef WIN32 // Windows
|
||||||
|
#ifdef _WIN64 // Windows 64-bit
|
||||||
|
int tmp_count = __popcnt64(val64);
|
||||||
|
#else // Windows 32-bit
|
||||||
|
int tmp_count = __popcnt(val64);
|
||||||
|
tmp_count += __popcnt(val64 >> 32);
|
||||||
|
#endif
|
||||||
|
#else // Linux
|
||||||
|
#ifdef __x86_64__ // Linux 64-bit
|
||||||
|
int tmp_count = __builtin_popcountll(val64);
|
||||||
|
#else // Linux 32-bit
|
||||||
|
int tmp_count = __builtin_popcount(val64);
|
||||||
|
tmp_count += __builtin_popcount(val64);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
return tmp_count;
|
||||||
|
}
|
||||||
|
|
||||||
void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
|
void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
|
||||||
unsigned char *A, int lda,
|
unsigned char *A, int lda,
|
||||||
unsigned char *B, int ldb,
|
unsigned char *B, int ldb,
|
||||||
@ -1739,11 +1758,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
|
|||||||
uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
|
uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
|
||||||
uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
|
uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
|
||||||
|
|
||||||
#ifdef WIN32
|
int tmp_count = popcnt_64(c_bit64);
|
||||||
int tmp_count = __popcnt64(c_bit64);
|
|
||||||
#else
|
|
||||||
int tmp_count = __builtin_popcountll(c_bit64);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits
|
if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits
|
||||||
count += tmp_count;
|
count += tmp_count;
|
||||||
|
10
src/layer.c
10
src/layer.c
@ -33,8 +33,8 @@ void free_layer(layer l)
|
|||||||
if (l.scale_updates) free(l.scale_updates);
|
if (l.scale_updates) free(l.scale_updates);
|
||||||
if (l.weights) free(l.weights);
|
if (l.weights) free(l.weights);
|
||||||
if (l.weight_updates) free(l.weight_updates);
|
if (l.weight_updates) free(l.weight_updates);
|
||||||
if (l.weights) free(l.align_bit_weights);
|
if (l.align_bit_weights) free(l.align_bit_weights);
|
||||||
if (l.weights) free(l.mean_arr);
|
if (l.mean_arr) free(l.mean_arr);
|
||||||
if (l.delta) free(l.delta);
|
if (l.delta) free(l.delta);
|
||||||
if (l.output) free(l.output);
|
if (l.output) free(l.output);
|
||||||
if (l.squared) free(l.squared);
|
if (l.squared) free(l.squared);
|
||||||
@ -84,6 +84,12 @@ void free_layer(layer l)
|
|||||||
if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
|
if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
|
||||||
if (l.x_gpu) cuda_free(l.x_gpu);
|
if (l.x_gpu) cuda_free(l.x_gpu);
|
||||||
if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
|
if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
|
||||||
|
|
||||||
|
if (l.align_bit_weights_gpu) cuda_free(l.align_bit_weights_gpu);
|
||||||
|
if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu);
|
||||||
|
if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu);
|
||||||
|
if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);
|
||||||
|
|
||||||
if (l.weights_gpu) cuda_free(l.weights_gpu);
|
if (l.weights_gpu) cuda_free(l.weights_gpu);
|
||||||
if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu);
|
if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu);
|
||||||
if (l.weights_gpu16) cuda_free(l.weights_gpu16);
|
if (l.weights_gpu16) cuda_free(l.weights_gpu16);
|
||||||
|
@ -374,17 +374,19 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh,
|
|||||||
for(n = 0; n < l.n; ++n){
|
for(n = 0; n < l.n; ++n){
|
||||||
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
|
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
|
||||||
float objectness = predictions[obj_index];
|
float objectness = predictions[obj_index];
|
||||||
if(objectness <= thresh) continue;
|
//if(objectness <= thresh) continue;
|
||||||
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
|
if (objectness > thresh) {
|
||||||
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
|
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
|
||||||
dets[count].objectness = objectness;
|
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
|
||||||
dets[count].classes = l.classes;
|
dets[count].objectness = objectness;
|
||||||
for(j = 0; j < l.classes; ++j){
|
dets[count].classes = l.classes;
|
||||||
int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
|
for (j = 0; j < l.classes; ++j) {
|
||||||
float prob = objectness*predictions[class_index];
|
int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
|
||||||
dets[count].prob[j] = (prob > thresh) ? prob : 0;
|
float prob = objectness*predictions[class_index];
|
||||||
|
dets[count].prob[j] = (prob > thresh) ? prob : 0;
|
||||||
|
}
|
||||||
|
++count;
|
||||||
}
|
}
|
||||||
++count;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter);
|
correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter);
|
||||||
|
Reference in New Issue
Block a user