diff --git a/Core/CPU.cpp b/Core/CPU.cpp index 4ff4d01..cfcd8ab 100644 --- a/Core/CPU.cpp +++ b/Core/CPU.cpp @@ -338,7 +338,7 @@ namespace CPU return Counter; } - x86SIMDType CheckSIMD() + uint64_t CheckSIMD() { #if defined(a32) return SIMD_NONE; /* TODO: Support x86 SIMD on x32 */ @@ -347,7 +347,9 @@ namespace CPU if (unlikely(!SSEEnabled)) return SIMD_NONE; - static x86SIMDType SIMDType = SIMD_NONE; + // return SIMD_SSE; + + static uint64_t SIMDType = SIMD_NONE; if (likely(SIMDType != SIMD_NONE)) return SIMDType; @@ -365,15 +367,15 @@ namespace CPU : "a"(0x1)); #endif if (cpuid1amd.ECX.SSE4_2) - SIMDType = SIMD_SSE42; + SIMDType |= SIMD_SSE42; else if (cpuid1amd.ECX.SSE4_1) - SIMDType = SIMD_SSE41; + SIMDType |= SIMD_SSE41; else if (cpuid1amd.ECX.SSE3) - SIMDType = SIMD_SSE3; + SIMDType |= SIMD_SSE3; else if (cpuid1amd.EDX.SSE2) - SIMDType = SIMD_SSE2; + SIMDType |= SIMD_SSE2; else if (cpuid1amd.EDX.SSE) - SIMDType = SIMD_SSE; + SIMDType |= SIMD_SSE; #ifdef DEBUG if (cpuid1amd.ECX.SSE4_2) @@ -403,15 +405,15 @@ namespace CPU : "a"(0x1)); #endif if (cpuid1intel.ECX.SSE4_2) - SIMDType = SIMD_SSE42; + SIMDType |= SIMD_SSE42; else if (cpuid1intel.ECX.SSE4_1) - SIMDType = SIMD_SSE41; + SIMDType |= SIMD_SSE41; else if (cpuid1intel.ECX.SSE3) - SIMDType = SIMD_SSE3; + SIMDType |= SIMD_SSE3; else if (cpuid1intel.EDX.SSE2) - SIMDType = SIMD_SSE2; + SIMDType |= SIMD_SSE2; else if (cpuid1intel.EDX.SSE) - SIMDType = SIMD_SSE; + SIMDType |= SIMD_SSE; #ifdef DEBUG if (cpuid1intel.ECX.SSE4_2) @@ -429,6 +431,7 @@ namespace CPU return SIMDType; } + debug("No SIMD support."); return SIMD_NONE; } diff --git a/Library/Convert.cpp b/Library/Convert.cpp index a994d2d..2c830cb 100644 --- a/Library/Convert.cpp +++ b/Library/Convert.cpp @@ -602,13 +602,13 @@ EXTERNC int log2(unsigned int n) } int log = 0; - + if ((n & 0xFFFF0000) != 0) { n >>= 16; log = 16; } - + if (n >= 256) { n >>= 8; @@ -626,7 +626,7 @@ EXTERNC int log2(unsigned int n) n >>= 2; log += 2; } - + return log + (n >> 1); } @@ -677,30 +677,21 @@ EXTERNC __no_stack_protector void *__memcpy_chk(void *dest, const void *src, siz __chk_fail(); void *ret = nullptr; - switch (CPU::CheckSIMD()) - { - case CPU::x86SIMDType::SIMD_SSE: - ret = memcpy_sse(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE2: - ret = memcpy_sse2(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE3: - ret = memcpy_sse3(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSSE3: - ret = memcpy_ssse3(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE41: - ret = memcpy_sse4_1(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE42: + uint64_t simd = CPU::CheckSIMD(); + if (simd & CPU::x86SIMDType::SIMD_SSE42) ret = memcpy_sse4_2(dest, src, len); - break; - default: + else if (simd & CPU::x86SIMDType::SIMD_SSE41) + ret = memcpy_sse4_1(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSSE3) + ret = memcpy_ssse3(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE3) + ret = memcpy_sse3(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE2) + ret = memcpy_sse2(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE) + ret = memcpy_sse(dest, src, len); + else ret = memcpy_unsafe(dest, src, len); - break; - } #ifdef DEBUG if (EnableExternalMemoryTracer) { @@ -751,30 +742,21 @@ EXTERNC __no_stack_protector void *__memset_chk(void *dest, int val, size_t len, __chk_fail(); void *ret = nullptr; - switch (CPU::CheckSIMD()) - { - case CPU::x86SIMDType::SIMD_SSE: - ret = memset_sse(dest, val, len); - break; - case CPU::x86SIMDType::SIMD_SSE2: - ret = memset_sse2(dest, val, len); - break; - case CPU::x86SIMDType::SIMD_SSE3: - ret = memset_sse3(dest, val, len); - break; - case CPU::x86SIMDType::SIMD_SSSE3: - ret = memset_ssse3(dest, val, len); - break; - case CPU::x86SIMDType::SIMD_SSE41: - ret = memset_sse4_1(dest, val, len); - break; - case CPU::x86SIMDType::SIMD_SSE42: + uint64_t simd = CPU::CheckSIMD(); + if (simd & CPU::x86SIMDType::SIMD_SSE42) ret = memset_sse4_2(dest, val, len); - break; - default: + else if (simd & CPU::x86SIMDType::SIMD_SSE41) + ret = memset_sse4_1(dest, val, len); + else if (simd & CPU::x86SIMDType::SIMD_SSSE3) + ret = memset_ssse3(dest, val, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE3) + ret = memset_sse3(dest, val, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE2) + ret = memset_sse2(dest, val, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE) + ret = memset_sse(dest, val, len); + else ret = memset_unsafe(dest, val, len); - break; - } #ifdef DEBUG if (EnableExternalMemoryTracer) { @@ -831,30 +813,21 @@ EXTERNC __no_stack_protector void *__memmove_chk(void *dest, const void *src, si __chk_fail(); void *ret = nullptr; - switch (CPU::CheckSIMD()) - { - case CPU::x86SIMDType::SIMD_SSE: - ret = memmove_sse(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE2: - ret = memmove_sse2(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE3: - ret = memmove_sse3(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSSE3: - ret = memmove_ssse3(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE41: - ret = memmove_sse4_1(dest, src, len); - break; - case CPU::x86SIMDType::SIMD_SSE42: + uint64_t simd = CPU::CheckSIMD(); + if (simd & CPU::x86SIMDType::SIMD_SSE42) ret = memmove_sse4_2(dest, src, len); - break; - default: + else if (simd & CPU::x86SIMDType::SIMD_SSE41) + ret = memmove_sse4_1(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSSE3) + ret = memmove_ssse3(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE3) + ret = memmove_sse3(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE2) + ret = memmove_sse2(dest, src, len); + else if (simd & CPU::x86SIMDType::SIMD_SSE) + ret = memmove_sse(dest, src, len); + else ret = memmove_unsafe(dest, src, len); - break; - } #ifdef DEBUG if (EnableExternalMemoryTracer) { diff --git a/include/cpu.hpp b/include/cpu.hpp index eaf7dfb..1285a58 100644 --- a/include/cpu.hpp +++ b/include/cpu.hpp @@ -43,36 +43,45 @@ namespace CPU enum x86SIMDType { - SIMD_NONE, - SIMD_SSE, - SIMD_SSE2, - SIMD_SSE3, - SIMD_SSSE3, - SIMD_SSE41, - SIMD_SSE42, - SIMD_AVX, - SIMD_AVX2, - SIMD_AVX512F, - SIMD_AVX512BW, - SIMD_AVX512CD, - SIMD_AVX512DQ, - SIMD_AVX512ER, - SIMD_AVX512IFMA, - SIMD_AVX512PF, - SIMD_AVX512VBMI, - SIMD_AVX512VL, - SIMD_AVX512VNNI, - SIMD_AVX512BITALG, - SIMD_AVX512VPOPCNTDQ, - SIMD_AVX512_4VNNIW, - SIMD_AVX512_4FMAPS, - SIMD_AVX512_VP2INTERSECT, - SIMD_AVX512_BF16, - SIMD_AVX512_VBMI2, - SIMD_AVX512_GFNI, - SIMD_AVX512_VAES, - SIMD_AVX512_VPCLMULQDQ, - SIMD_AVX512_VNNI, + SIMD_NONE = (1 << 0), + + SIMD_SSE = (1 << 1), + SIMD_SSE2 = (1 << 2), + SIMD_SSE3 = (1 << 3), + SIMD_SSSE3 = (1 << 4), + SIMD_SSE41 = (1 << 5), + SIMD_SSE42 = (1 << 6), + + SIMD_AVX = (1 << 7), + SIMD_AVX2 = (1 << 8), + SIMD_AVX512 = (1 << 9), + + SIMD_AVX512F = (1 << 10), + SIMD_AVX512CD = (1 << 11), + SIMD_AVX512ER = (1 << 12), + SIMD_AVX512PF = (1 << 13), + + SIMD_AVX512VL = (1 << 14), + SIMD_AVX512DQ = (1 << 16), + SIMD_AVX512BW = (1 << 15), + + SIMD_AVX512IFMA = (1 << 17), + SIMD_AVX512VBMI = (1 << 18), + + SIMD_AVX5124VNNIW = (1 << 19), + SIMD_AVX5124FMAPS = (1 << 20), + + SIMD_AVX512VPOPCNTDQ = (1 << 21), + + SIMD_AVX512VNNI = (1 << 22), + SIMD_AVX512VBMI2 = (1 << 23), + SIMD_AVX512BITALG = (1 << 24), + + SIMD_AVX512VP2INTERSECT = (1 << 25), + + SIMD_AVX512GFNI = (1 << 26), + SIMD_AVX512VPCLMULQDQ = (1 << 27), + SIMD_AVX512VAES = (1 << 28), }; /** @@ -99,9 +108,9 @@ namespace CPU /** * @brief Check SIMD support. It will return the highest supported SIMD type. * - * @return x86SIMDType + * @return x86SIMDType flags. */ - x86SIMDType CheckSIMD(); + uint64_t CheckSIMD(); /** * @brief Check SIMD support.