CheckSIMD should use flags

This commit is contained in:
Alex 2023-03-27 16:32:42 +03:00
parent 7d35a0c315
commit 49c627589c
Signed by untrusted user who does not match committer: enderice2
GPG Key ID: EACC3AD603BAB4DD
3 changed files with 98 additions and 113 deletions

View File

@ -338,7 +338,7 @@ namespace CPU
return Counter;
}
x86SIMDType CheckSIMD()
uint64_t CheckSIMD()
{
#if defined(a32)
return SIMD_NONE; /* TODO: Support x86 SIMD on x32 */
@ -347,7 +347,9 @@ namespace CPU
if (unlikely(!SSEEnabled))
return SIMD_NONE;
static x86SIMDType SIMDType = SIMD_NONE;
// return SIMD_SSE;
static uint64_t SIMDType = SIMD_NONE;
if (likely(SIMDType != SIMD_NONE))
return SIMDType;
@ -365,15 +367,15 @@ namespace CPU
: "a"(0x1));
#endif
if (cpuid1amd.ECX.SSE4_2)
SIMDType = SIMD_SSE42;
SIMDType |= SIMD_SSE42;
else if (cpuid1amd.ECX.SSE4_1)
SIMDType = SIMD_SSE41;
SIMDType |= SIMD_SSE41;
else if (cpuid1amd.ECX.SSE3)
SIMDType = SIMD_SSE3;
SIMDType |= SIMD_SSE3;
else if (cpuid1amd.EDX.SSE2)
SIMDType = SIMD_SSE2;
SIMDType |= SIMD_SSE2;
else if (cpuid1amd.EDX.SSE)
SIMDType = SIMD_SSE;
SIMDType |= SIMD_SSE;
#ifdef DEBUG
if (cpuid1amd.ECX.SSE4_2)
@ -403,15 +405,15 @@ namespace CPU
: "a"(0x1));
#endif
if (cpuid1intel.ECX.SSE4_2)
SIMDType = SIMD_SSE42;
SIMDType |= SIMD_SSE42;
else if (cpuid1intel.ECX.SSE4_1)
SIMDType = SIMD_SSE41;
SIMDType |= SIMD_SSE41;
else if (cpuid1intel.ECX.SSE3)
SIMDType = SIMD_SSE3;
SIMDType |= SIMD_SSE3;
else if (cpuid1intel.EDX.SSE2)
SIMDType = SIMD_SSE2;
SIMDType |= SIMD_SSE2;
else if (cpuid1intel.EDX.SSE)
SIMDType = SIMD_SSE;
SIMDType |= SIMD_SSE;
#ifdef DEBUG
if (cpuid1intel.ECX.SSE4_2)
@ -429,6 +431,7 @@ namespace CPU
return SIMDType;
}
debug("No SIMD support.");
return SIMD_NONE;
}

View File

@ -677,30 +677,21 @@ EXTERNC __no_stack_protector void *__memcpy_chk(void *dest, const void *src, siz
__chk_fail();
void *ret = nullptr;
switch (CPU::CheckSIMD())
{
case CPU::x86SIMDType::SIMD_SSE:
ret = memcpy_sse(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE2:
ret = memcpy_sse2(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE3:
ret = memcpy_sse3(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSSE3:
ret = memcpy_ssse3(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE41:
ret = memcpy_sse4_1(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE42:
uint64_t simd = CPU::CheckSIMD();
if (simd & CPU::x86SIMDType::SIMD_SSE42)
ret = memcpy_sse4_2(dest, src, len);
break;
default:
else if (simd & CPU::x86SIMDType::SIMD_SSE41)
ret = memcpy_sse4_1(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSSE3)
ret = memcpy_ssse3(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE3)
ret = memcpy_sse3(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE2)
ret = memcpy_sse2(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE)
ret = memcpy_sse(dest, src, len);
else
ret = memcpy_unsafe(dest, src, len);
break;
}
#ifdef DEBUG
if (EnableExternalMemoryTracer)
{
@ -751,30 +742,21 @@ EXTERNC __no_stack_protector void *__memset_chk(void *dest, int val, size_t len,
__chk_fail();
void *ret = nullptr;
switch (CPU::CheckSIMD())
{
case CPU::x86SIMDType::SIMD_SSE:
ret = memset_sse(dest, val, len);
break;
case CPU::x86SIMDType::SIMD_SSE2:
ret = memset_sse2(dest, val, len);
break;
case CPU::x86SIMDType::SIMD_SSE3:
ret = memset_sse3(dest, val, len);
break;
case CPU::x86SIMDType::SIMD_SSSE3:
ret = memset_ssse3(dest, val, len);
break;
case CPU::x86SIMDType::SIMD_SSE41:
ret = memset_sse4_1(dest, val, len);
break;
case CPU::x86SIMDType::SIMD_SSE42:
uint64_t simd = CPU::CheckSIMD();
if (simd & CPU::x86SIMDType::SIMD_SSE42)
ret = memset_sse4_2(dest, val, len);
break;
default:
else if (simd & CPU::x86SIMDType::SIMD_SSE41)
ret = memset_sse4_1(dest, val, len);
else if (simd & CPU::x86SIMDType::SIMD_SSSE3)
ret = memset_ssse3(dest, val, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE3)
ret = memset_sse3(dest, val, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE2)
ret = memset_sse2(dest, val, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE)
ret = memset_sse(dest, val, len);
else
ret = memset_unsafe(dest, val, len);
break;
}
#ifdef DEBUG
if (EnableExternalMemoryTracer)
{
@ -831,30 +813,21 @@ EXTERNC __no_stack_protector void *__memmove_chk(void *dest, const void *src, si
__chk_fail();
void *ret = nullptr;
switch (CPU::CheckSIMD())
{
case CPU::x86SIMDType::SIMD_SSE:
ret = memmove_sse(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE2:
ret = memmove_sse2(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE3:
ret = memmove_sse3(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSSE3:
ret = memmove_ssse3(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE41:
ret = memmove_sse4_1(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE42:
uint64_t simd = CPU::CheckSIMD();
if (simd & CPU::x86SIMDType::SIMD_SSE42)
ret = memmove_sse4_2(dest, src, len);
break;
default:
else if (simd & CPU::x86SIMDType::SIMD_SSE41)
ret = memmove_sse4_1(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSSE3)
ret = memmove_ssse3(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE3)
ret = memmove_sse3(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE2)
ret = memmove_sse2(dest, src, len);
else if (simd & CPU::x86SIMDType::SIMD_SSE)
ret = memmove_sse(dest, src, len);
else
ret = memmove_unsafe(dest, src, len);
break;
}
#ifdef DEBUG
if (EnableExternalMemoryTracer)
{

View File

@ -43,36 +43,45 @@ namespace CPU
enum x86SIMDType
{
SIMD_NONE,
SIMD_SSE,
SIMD_SSE2,
SIMD_SSE3,
SIMD_SSSE3,
SIMD_SSE41,
SIMD_SSE42,
SIMD_AVX,
SIMD_AVX2,
SIMD_AVX512F,
SIMD_AVX512BW,
SIMD_AVX512CD,
SIMD_AVX512DQ,
SIMD_AVX512ER,
SIMD_AVX512IFMA,
SIMD_AVX512PF,
SIMD_AVX512VBMI,
SIMD_AVX512VL,
SIMD_AVX512VNNI,
SIMD_AVX512BITALG,
SIMD_AVX512VPOPCNTDQ,
SIMD_AVX512_4VNNIW,
SIMD_AVX512_4FMAPS,
SIMD_AVX512_VP2INTERSECT,
SIMD_AVX512_BF16,
SIMD_AVX512_VBMI2,
SIMD_AVX512_GFNI,
SIMD_AVX512_VAES,
SIMD_AVX512_VPCLMULQDQ,
SIMD_AVX512_VNNI,
SIMD_NONE = (1 << 0),
SIMD_SSE = (1 << 1),
SIMD_SSE2 = (1 << 2),
SIMD_SSE3 = (1 << 3),
SIMD_SSSE3 = (1 << 4),
SIMD_SSE41 = (1 << 5),
SIMD_SSE42 = (1 << 6),
SIMD_AVX = (1 << 7),
SIMD_AVX2 = (1 << 8),
SIMD_AVX512 = (1 << 9),
SIMD_AVX512F = (1 << 10),
SIMD_AVX512CD = (1 << 11),
SIMD_AVX512ER = (1 << 12),
SIMD_AVX512PF = (1 << 13),
SIMD_AVX512VL = (1 << 14),
SIMD_AVX512DQ = (1 << 16),
SIMD_AVX512BW = (1 << 15),
SIMD_AVX512IFMA = (1 << 17),
SIMD_AVX512VBMI = (1 << 18),
SIMD_AVX5124VNNIW = (1 << 19),
SIMD_AVX5124FMAPS = (1 << 20),
SIMD_AVX512VPOPCNTDQ = (1 << 21),
SIMD_AVX512VNNI = (1 << 22),
SIMD_AVX512VBMI2 = (1 << 23),
SIMD_AVX512BITALG = (1 << 24),
SIMD_AVX512VP2INTERSECT = (1 << 25),
SIMD_AVX512GFNI = (1 << 26),
SIMD_AVX512VPCLMULQDQ = (1 << 27),
SIMD_AVX512VAES = (1 << 28),
};
/**
@ -99,9 +108,9 @@ namespace CPU
/**
* @brief Check SIMD support. It will return the highest supported SIMD type.
*
* @return x86SIMDType
* @return x86SIMDType flags.
*/
x86SIMDType CheckSIMD();
uint64_t CheckSIMD();
/**
* @brief Check SIMD support.