Update kernel

This commit is contained in:
EnderIce2
2024-01-19 06:47:42 +02:00
parent fd15592608
commit 96daa43d38
282 changed files with 25486 additions and 15700 deletions

View File

@ -24,58 +24,91 @@
/*
TODO: Replace these functions with even more optimized versions.
The current versions are fast but not as fast as they could be and also we need implementation for avx, not only sse.
The current versions are fast but not as fast as they could be and also we need implementation for avx, not only sse.
*/
// TODO: Implement these functions
EXTERNC void *memset_sse(void *dest, int c, size_t n)
{
return memset_unsafe(dest, c, n);
return memset_unsafe(dest, c, n);
}
EXTERNC void *memset_sse2(void *dest, int c, size_t n)
{
return memset_unsafe(dest, c, n);
size_t i;
if ((size_t)dest & (16 - 1))
{
i = 0;
while (((size_t)dest + i) & (16 - 1) && i < n)
{
asmv("stosb\n"
:
: "D"((size_t)dest + i),
"a"(c));
i++;
}
}
else
i = 0;
for (; i + 64 <= n; i += 64)
{
asmv("movd %0, %%xmm0\n"
"movdqa %%xmm0, (%1)\n"
"movdqa %%xmm0, 16(%1)\n"
"movdqa %%xmm0, 32(%1)\n"
"movdqa %%xmm0, 48(%1)\n"
:
: "r"(c), "r"(((size_t)dest) + i)
: "xmm0");
}
asmv("rep stosb\n" ::"a"((size_t)(c)),
"D"(((size_t)dest) + i),
"c"(n - i));
i += n - i;
return (void *)(((size_t)dest) + i);
}
EXTERNC void *memset_sse3(void *dest, int c, size_t n)
{
return memset_unsafe(dest, c, n);
return memset_sse2(dest, c, n);
}
EXTERNC void *memset_ssse3(void *dest, int c, size_t n)
{
return memset_unsafe(dest, c, n);
return memset_sse2(dest, c, n);
}
EXTERNC void *memset_sse4_1(void *dest, int c, size_t n)
{
return memset_unsafe(dest, c, n);
return memset_sse2(dest, c, n);
}
EXTERNC void *memset_sse4_2(void *dest, int c, size_t n)
{
#if defined(a64)
char *d = (char *)dest;
char *d = (char *)dest;
if (((uintptr_t)d & 0xF) == 0)
{
size_t num_vectors = n / 16;
for (size_t i = 0; i < num_vectors; i++)
{
asmv("movd %0, %%xmm0\n"
"pshufd $0, %%xmm0, %%xmm0\n"
"movdqa %%xmm0, (%1)\n"
:
: "r"(c), "r"(d)
: "xmm0");
d += 16;
}
n -= num_vectors * 16;
}
if (((uintptr_t)d & 0xF) == 0)
{
size_t num_vectors = n / 16;
for (size_t i = 0; i < num_vectors; i++)
{
asmv("movd %0, %%xmm0\n"
"pshufd $0, %%xmm0, %%xmm0\n"
"movdqa %%xmm0, (%1)\n"
:
: "r"(c), "r"(d)
: "xmm0");
d += 16;
}
n -= num_vectors * 16;
}
memset_unsafe(d, c, n);
memset_unsafe(d, c, n);
#endif
return dest;
return dest;
}