From f91503d7043400b2c396997110ffe4ab83322218 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 4 Jan 2023 06:45:22 +0200 Subject: [PATCH] TODO: implement a faster mem* --- Library/MemoryCopySIMD.cpp | 5 ++ Library/MemoryMoveSIMD.cpp | 23 ++++---- Library/MemorySetSIMD.cpp | 118 ++++--------------------------------- 3 files changed, 28 insertions(+), 118 deletions(-) diff --git a/Library/MemoryCopySIMD.cpp b/Library/MemoryCopySIMD.cpp index e8ae103..8d98e38 100644 --- a/Library/MemoryCopySIMD.cpp +++ b/Library/MemoryCopySIMD.cpp @@ -5,6 +5,11 @@ #include #include +/* +TODO: Replace these functions with even more optimized versions. + The current versions are fast but not as fast as they could be and also we need implementation for avx, not only sse. +*/ + EXTERNC void *memcpy_sse(void *dest, const void *src, size_t n) { char *d = (char *)dest; diff --git a/Library/MemoryMoveSIMD.cpp b/Library/MemoryMoveSIMD.cpp index f51bb3a..6bd3d4a 100644 --- a/Library/MemoryMoveSIMD.cpp +++ b/Library/MemoryMoveSIMD.cpp @@ -5,40 +5,39 @@ #include #include +/* +TODO: Replace these functions with even more optimized versions. + The current versions are fast but not as fast as they could be and also we need implementation for avx, not only sse. +*/ + // TODO: Implement these functions EXTERNC void *memmove_sse(void *dest, const void *src, size_t n) { - memmove_unsafe(dest, src, n); - return dest; + return memmove_unsafe(dest, src, n); } EXTERNC void *memmove_sse2(void *dest, const void *src, size_t n) { - memmove_unsafe(dest, src, n); - return dest; + return memmove_unsafe(dest, src, n); } EXTERNC void *memmove_sse3(void *dest, const void *src, size_t n) { - memmove_unsafe(dest, src, n); - return dest; + return memmove_unsafe(dest, src, n); } EXTERNC void *memmove_ssse3(void *dest, const void *src, size_t n) { - memmove_unsafe(dest, src, n); - return dest; + return memmove_unsafe(dest, src, n); } EXTERNC void *memmove_sse4_1(void *dest, const void *src, size_t n) { - memmove_unsafe(dest, src, n); - return dest; + return memmove_unsafe(dest, src, n); } EXTERNC void *memmove_sse4_2(void *dest, const void *src, size_t n) { - memmove_unsafe(dest, src, n); - return dest; + return memmove_unsafe(dest, src, n); } diff --git a/Library/MemorySetSIMD.cpp b/Library/MemorySetSIMD.cpp index 033b558..a9ad6a5 100644 --- a/Library/MemorySetSIMD.cpp +++ b/Library/MemorySetSIMD.cpp @@ -5,110 +5,40 @@ #include #include -// TODO: Implement these functions properly +/* +TODO: Replace these functions with even more optimized versions. + The current versions are fast but not as fast as they could be and also we need implementation for avx, not only sse. +*/ + +// TODO: Implement these functions EXTERNC void *memset_sse(void *dest, int c, size_t n) { return memset_unsafe(dest, c, n); - char *d = (char *)dest; - - if (((uintptr_t)d & 0xF) == 0) - { - size_t num_vectors = n / 16; - for (size_t i = 0; i < num_vectors; i++) - { - asmv("movaps (%0), %%xmm0\n" - "movaps %%xmm0, (%1)\n" - : - : "r"(c), "r"(d) - : "xmm0"); - d += 16; - } - n -= num_vectors * 16; - } - - memset_unsafe(d, c, n); - return dest; } EXTERNC void *memset_sse2(void *dest, int c, size_t n) { return memset_unsafe(dest, c, n); - char *d = (char *)dest; - - if (((uintptr_t)d & 0xF) == 0) - { - size_t num_vectors = n / 16; - for (size_t i = 0; i < num_vectors; i++) - { - asmv("movdqa (%0), %%xmm0\n" - "movdqa %%xmm0, (%1)\n" - : - : "r"(c), "r"(d) - : "xmm0"); - d += 16; - } - n -= num_vectors * 16; - } - - memset_unsafe(d, c, n); - return dest; } EXTERNC void *memset_sse3(void *dest, int c, size_t n) { return memset_unsafe(dest, c, n); - char *d = (char *)dest; - - if (((uintptr_t)d & 0x7) == 0) - { - size_t num_vectors = n / 8; - for (size_t i = 0; i < num_vectors; i++) - { - asmv("movq (%0), %%xmm0\n" - "movddup %%xmm0, %%xmm1\n" - "movq %%xmm1, (%1)\n" - : - : "r"(c), "r"(d) - : "xmm0", "xmm1"); - d += 16; - } - n -= num_vectors * 16; - } - - memset_unsafe(d, c, n); - return dest; } EXTERNC void *memset_ssse3(void *dest, int c, size_t n) { return memset_unsafe(dest, c, n); - char *d = (char *)dest; - - if (((uintptr_t)d & 0xF) == 0) - { - size_t num_vectors = n / 16; - for (size_t i = 0; i < num_vectors; i++) - { - asmv("movdqa (%0), %%xmm0\n" - "movdqa 16(%0), %%xmm1\n" - "palignr $8, %%xmm0, %%xmm1\n" - "movdqa %%xmm1, (%1)\n" - : - : "r"(c), "r"(d) - : "xmm0", "xmm1"); - d += 16; - } - n -= num_vectors * 16; - } - - memset_unsafe(d, c, n); - return dest; } EXTERNC void *memset_sse4_1(void *dest, int c, size_t n) { return memset_unsafe(dest, c, n); +} + +EXTERNC void *memset_sse4_2(void *dest, int c, size_t n) +{ char *d = (char *)dest; if (((uintptr_t)d & 0xF) == 0) @@ -116,7 +46,8 @@ EXTERNC void *memset_sse4_1(void *dest, int c, size_t n) size_t num_vectors = n / 16; for (size_t i = 0; i < num_vectors; i++) { - asmv("movdqa (%0), %%xmm0\n" + asmv("movd %0, %%xmm0\n" + "pshufd $0, %%xmm0, %%xmm0\n" "movdqa %%xmm0, (%1)\n" : : "r"(c), "r"(d) @@ -129,28 +60,3 @@ EXTERNC void *memset_sse4_1(void *dest, int c, size_t n) memset_unsafe(d, c, n); return dest; } - -EXTERNC void *memset_sse4_2(void *dest, int c, size_t n) -{ - return memset_unsafe(dest, c, n); - char *d = (char *)dest; - - if (((uintptr_t)d & 0xF) == 0) - { - size_t num_vectors = n / 16; - for (size_t i = 0; i < num_vectors; i++) - { - asmv("movdqa (%0), %%xmm0\n" - "pcmpistri $0, (%0), %%xmm0\n" - "movdqa %%xmm0, (%1)\n" - : - : "r"(d), "r"(c) - : "xmm0"); - d += 16; - } - n -= num_vectors * 16; - } - - memset_unsafe(d, c, n); - return dest; -}