From 76cf0b205f193f220d35d086f04b42304c6920c6 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 16 Mar 2023 21:41:11 +0200 Subject: [PATCH] Stub intrinsics header --- include/intrin.hpp | 193 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 include/intrin.hpp diff --git a/include/intrin.hpp b/include/intrin.hpp new file mode 100644 index 0000000..6a9b941 --- /dev/null +++ b/include/intrin.hpp @@ -0,0 +1,193 @@ +#ifndef __FENNIX_KERNEL_SIMD_H__ +#define __FENNIX_KERNEL_SIMD_H__ + +#include +#include + +#define MMX_FN_ATTR __always_inline __target("mmx") +#define SSE_FN_ATTR __always_inline __target("sse") +#define SSE2_FN_ATTR __always_inline __target("sse2") +#define SSE3_FN_ATTR __always_inline __target("sse3") +#define SSSE3_FN_ATTR __always_inline __target("ssse3") +#define SSE4_1_FN_ATTR __always_inline __target("sse4.1") +#define SSE4_2_FN_ATTR __always_inline __target("sse4.2") +#define AVX_FN_ATTR __always_inline __target("avx") +#define AVX2_FN_ATTR __always_inline __target("avx2") +#define ST_IN static inline + +namespace FXSR +{ + void _fxsave(void *mem_addr) + { + __builtin_ia32_fxsave(mem_addr); + } + + void _fxrstor(void *mem_addr) + { + __builtin_ia32_fxrstor(mem_addr); + } + + void _fxsave64(void *mem_addr) + { + asmv("fxsaveq (%0)" + : + : "r"(mem_addr) + : "memory"); + } + + void _fxrstor64(void *mem_addr) + { + asmv("fxrstorq (%0)" + : + : "r"(mem_addr) + : "memory"); + } +} + +namespace SMAP +{ + void _clac(void) + { + asmv("clac" :: + : "cc"); + } + + void _stac(void) + { + asmv("stac" :: + : "cc"); + } +} + +namespace MMX +{ + typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); + + typedef long long __v1di __attribute__((__vector_size__(8))); + typedef int __v2si __attribute__((__vector_size__(8))); + typedef short __v4hi __attribute__((__vector_size__(8))); + typedef char __v8qi __attribute__((__vector_size__(8))); + + ST_IN MMX_FN_ATTR void _mm_empty(void) + { + __builtin_ia32_emms(); + } +} + +namespace SSE +{ + typedef int __v4si __attribute__((__vector_size__(16))); + typedef unsigned int __v4su __attribute__((__vector_size__(16))); + typedef float __v4sf __attribute__((__vector_size__(16))); + + typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); + typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); + + ST_IN SSE_FN_ATTR __m128 _mm_add_ss(__m128 a, __m128 b) + { + // return __builtin_ia32_addss(a, b); + a[0] += b[0]; + return a; + } + + ST_IN SSE_FN_ATTR __m128 _mm_add_ps(__m128 a, __m128 b) + { + return (__m128)((__v4sf)a + (__v4sf)b); + } +} + +namespace SSE2 +{ + typedef double __v2df __attribute__((__vector_size__(16))); + + typedef long long __v2di __attribute__((__vector_size__(16))); + typedef short __v8hi __attribute__((__vector_size__(16))); + typedef char __v16qi __attribute__((__vector_size__(16))); + typedef signed char __v16qs __attribute__((__vector_size__(16))); + + typedef unsigned long long __v2du __attribute__((__vector_size__(16))); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + + typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); + typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); + typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); + typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1))); + + ST_IN SSE2_FN_ATTR __m128i _mm_mul_epu32(__m128i a, __m128i b) + { + __m128i result; + __asm__("pmuludq %1, %2 \n\t" + "movdqu %2, %0 \n\t" + : "=x"(result) + : "x"(a), "x"(b)); + return result; + } + + ST_IN SSE2_FN_ATTR __m128i _mm_set_epi32(int e3, int e2, int e1, int e0) + { + __m128i result; + __asm__("movd %[e0], %[result]\n\t" + "pinsrd $1, %[e1], %[result]\n\t" + "pinsrd $2, %[e2], %[result]\n\t" + "pinsrd $3, %[e3], %[result]\n\t" + : [result] "=x"(result) + : [e0] "r"(e0), [e1] "r"(e1), [e2] "r"(e2), [e3] "r"(e3)); + return result; + } + + ST_IN SSE2_FN_ATTR __m128i _mm_set1_epi32(int a) + { + __m128i result; + __asm__("movd %1, %%xmm0\n\t" + "pshufd $0, %%xmm0, %0\n\t" + : "=x"(result) + : "r"(a) + : "%xmm0"); + return result; + } + + ST_IN SSE2_FN_ATTR void _mm_storeu_si128(__m128i *mem_addr, __m128i a) + { + asm volatile("movdqu %1, %0" + : "=m"(*mem_addr) + : "x"(a)); + } +} + +namespace SSE3 +{ + +} + +namespace SSSE3 +{ + +} + +namespace SSE4_1 +{ + typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); + + ST_IN SSE4_1_FN_ATTR __m128i _mm_cvtepu8_epi32(__m128i a); + ST_IN SSE4_1_FN_ATTR __m128i _mm_mullo_epi32(__m128i a, __m128i b); + ST_IN SSE4_1_FN_ATTR __m128i _mm_srli_epi32(__m128i a, int imm8); + ST_IN SSE4_1_FN_ATTR int _mm_cvtsi128_si32(__m128i a); +} + +namespace SSE4_2 +{ + +} + +namespace AVX +{ + +} + +namespace AVX2 +{ + +} + +#endif // !__FENNIX_KERNEL_SIMD_H__