Kernel now uses SSE instructions properly

This commit is contained in:
Alex 2023-01-03 06:36:15 +02:00
parent 450fe4f0ac
commit 4501826025
Signed by untrusted user who does not match committer: enderice2
GPG Key ID: EACC3AD603BAB4DD
13 changed files with 1270 additions and 572 deletions

View File

@ -237,13 +237,42 @@ namespace APIC
this->APICBaseAddress = BaseLow << 12u | BaseHigh << 32u;
trace("APIC Address: %#lx", this->APICBaseAddress);
uint32_t rcx;
cpuid(1, 0, 0, &rcx, 0);
if (rcx & CPUID_FEAT_RCX_x2APIC)
bool x2APICSupported = false;
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
{
// this->x2APICSupported = true;
warn("x2APIC not supported yet.");
// wrmsr(MSR_APIC_BASE, (rdmsr(MSR_APIC_BASE) | (1 << 11)) & ~(1 << 10));
#if defined(__amd64__)
CPU::x64::AMD::CPUID0x1 cpuid1amd;
#elif defined(__i386__)
CPU::x32::AMD::CPUID0x1 cpuid1amd;
#endif
#if defined(__amd64__) || defined(__i386__)
asmv("cpuid"
: "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw)
: "a"(0x1));
#endif
// FIXME: Not sure if I configured this correctly or something else is wrong
// x2APICSupported = cpuid1amd.ECX.x2APIC;
fixme("AMD does even support x2APIC? ECX->Reserved10: %#lx", cpuid1amd.ECX.Reserved10);
}
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;
#elif defined(__i386__)
CPU::x32::Intel::CPUID0x1 cpuid1intel;
#endif
#if defined(__amd64__) || defined(__i386__)
asmv("cpuid"
: "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw)
: "a"(0x1));
#endif
x2APICSupported = cpuid1intel.ECX.x2APIC;
}
if (x2APICSupported)
{
this->x2APICSupported = true;
wrmsr(MSR_APIC_BASE, (rdmsr(MSR_APIC_BASE) | (1 << 11)) & ~(1 << 10));
BaseStruct.EN = 1;
wrmsr(MSR_APIC_BASE, BaseStruct.raw);
}

View File

@ -53,8 +53,8 @@ CPUData *GetCurrentCPU()
extern "C" void StartCPU()
{
CPU::Interrupts(CPU::Disable);
CPU::InitializeFeatures();
uint64_t CoreID = (int)*reinterpret_cast<int *>(CORE);
CPU::InitializeFeatures(CoreID);
// Initialize GDT and IDT
Interrupts::Initialize(CoreID);
Interrupts::Enable(CoreID);

View File

@ -3,26 +3,29 @@
#include <memory.hpp>
#include <convert.h>
#include <debug.h>
#include <smp.hpp>
#include "../kernel.h"
namespace CPU
{
static bool SSEEnabled = false;
char *Vendor()
{
static char Vendor[13];
#if defined(__amd64__)
uint32_t rax, rbx, rcx, rdx;
x64::cpuid(0x0, &rax, &rbx, &rcx, &rdx);
memcpy(Vendor + 0, &rbx, 4);
memcpy(Vendor + 4, &rdx, 4);
memcpy(Vendor + 8, &rcx, 4);
uint32_t eax, ebx, ecx, edx;
x64::cpuid(0x0, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Vendor + 0, &ebx, 4);
memcpy_unsafe(Vendor + 4, &edx, 4);
memcpy_unsafe(Vendor + 8, &ecx, 4);
#elif defined(__i386__)
uint32_t rax, rbx, rcx, rdx;
x32::cpuid(0x0, &rax, &rbx, &rcx, &rdx);
memcpy(Vendor + 0, &rbx, 4);
memcpy(Vendor + 4, &rdx, 4);
memcpy(Vendor + 8, &rcx, 4);
uint32_t eax, ebx, ecx, edx;
x32::cpuid(0x0, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Vendor + 0, &ebx, 4);
memcpy_unsafe(Vendor + 4, &edx, 4);
memcpy_unsafe(Vendor + 8, &ecx, 4);
#elif defined(__aarch64__)
asmv("mrs %0, MIDR_EL1"
: "=r"(Vendor[0]));
@ -34,39 +37,39 @@ namespace CPU
{
static char Name[49];
#if defined(__amd64__)
uint32_t rax, rbx, rcx, rdx;
x64::cpuid(0x80000002, &rax, &rbx, &rcx, &rdx);
memcpy(Name + 0, &rax, 4);
memcpy(Name + 4, &rbx, 4);
memcpy(Name + 8, &rcx, 4);
memcpy(Name + 12, &rdx, 4);
x64::cpuid(0x80000003, &rax, &rbx, &rcx, &rdx);
memcpy(Name + 16, &rax, 4);
memcpy(Name + 20, &rbx, 4);
memcpy(Name + 24, &rcx, 4);
memcpy(Name + 28, &rdx, 4);
x64::cpuid(0x80000004, &rax, &rbx, &rcx, &rdx);
memcpy(Name + 32, &rax, 4);
memcpy(Name + 36, &rbx, 4);
memcpy(Name + 40, &rcx, 4);
memcpy(Name + 44, &rdx, 4);
uint32_t eax, ebx, ecx, edx;
x64::cpuid(0x80000002, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Name + 0, &eax, 4);
memcpy_unsafe(Name + 4, &ebx, 4);
memcpy_unsafe(Name + 8, &ecx, 4);
memcpy_unsafe(Name + 12, &edx, 4);
x64::cpuid(0x80000003, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Name + 16, &eax, 4);
memcpy_unsafe(Name + 20, &ebx, 4);
memcpy_unsafe(Name + 24, &ecx, 4);
memcpy_unsafe(Name + 28, &edx, 4);
x64::cpuid(0x80000004, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Name + 32, &eax, 4);
memcpy_unsafe(Name + 36, &ebx, 4);
memcpy_unsafe(Name + 40, &ecx, 4);
memcpy_unsafe(Name + 44, &edx, 4);
#elif defined(__i386__)
uint32_t rax, rbx, rcx, rdx;
x32::cpuid(0x80000002, &rax, &rbx, &rcx, &rdx);
memcpy(Name + 0, &rax, 4);
memcpy(Name + 4, &rbx, 4);
memcpy(Name + 8, &rcx, 4);
memcpy(Name + 12, &rdx, 4);
x32::cpuid(0x80000003, &rax, &rbx, &rcx, &rdx);
memcpy(Name + 16, &rax, 4);
memcpy(Name + 20, &rbx, 4);
memcpy(Name + 24, &rcx, 4);
memcpy(Name + 28, &rdx, 4);
x32::cpuid(0x80000004, &rax, &rbx, &rcx, &rdx);
memcpy(Name + 32, &rax, 4);
memcpy(Name + 36, &rbx, 4);
memcpy(Name + 40, &rcx, 4);
memcpy(Name + 44, &rdx, 4);
uint32_t eax, ebx, ecx, edx;
x32::cpuid(0x80000002, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Name + 0, &eax, 4);
memcpy_unsafe(Name + 4, &ebx, 4);
memcpy_unsafe(Name + 8, &ecx, 4);
memcpy_unsafe(Name + 12, &edx, 4);
x32::cpuid(0x80000003, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Name + 16, &eax, 4);
memcpy_unsafe(Name + 20, &ebx, 4);
memcpy_unsafe(Name + 24, &ecx, 4);
memcpy_unsafe(Name + 28, &edx, 4);
x32::cpuid(0x80000004, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Name + 32, &eax, 4);
memcpy_unsafe(Name + 36, &ebx, 4);
memcpy_unsafe(Name + 40, &ecx, 4);
memcpy_unsafe(Name + 44, &edx, 4);
#elif defined(__aarch64__)
asmv("mrs %0, MIDR_EL1"
: "=r"(Name[0]));
@ -78,17 +81,17 @@ namespace CPU
{
static char Hypervisor[13];
#if defined(__amd64__)
uint32_t rax, rbx, rcx, rdx;
x64::cpuid(0x40000000, &rax, &rbx, &rcx, &rdx);
memcpy(Hypervisor + 0, &rbx, 4);
memcpy(Hypervisor + 4, &rcx, 4);
memcpy(Hypervisor + 8, &rdx, 4);
uint32_t eax, ebx, ecx, edx;
x64::cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Hypervisor + 0, &ebx, 4);
memcpy_unsafe(Hypervisor + 4, &ecx, 4);
memcpy_unsafe(Hypervisor + 8, &edx, 4);
#elif defined(__i386__)
uint32_t rax, rbx, rcx, rdx;
x64::cpuid(0x40000000, &rax, &rbx, &rcx, &rdx);
memcpy(Hypervisor + 0, &rbx, 4);
memcpy(Hypervisor + 4, &rcx, 4);
memcpy(Hypervisor + 8, &rdx, 4);
uint32_t eax, ebx, ecx, edx;
x64::cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
memcpy_unsafe(Hypervisor + 0, &ebx, 4);
memcpy_unsafe(Hypervisor + 4, &ecx, 4);
memcpy_unsafe(Hypervisor + 8, &edx, 4);
#elif defined(__aarch64__)
asmv("mrs %0, MIDR_EL1"
: "=r"(Hypervisor[0]));
@ -171,15 +174,51 @@ namespace CPU
return PT;
}
void InitializeFeatures()
void InitializeFeatures(long Core)
{
bool PGESupport = false;
bool SSESupport = false;
#if defined(__amd64__)
static int BSP = 0;
x64::CR0 cr0 = x64::readcr0();
x64::CR4 cr4 = x64::readcr4();
uint32_t rax, rbx, rcx, rdx;
x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx);
if (rdx & x64::CPUID_FEAT_RDX_PGE)
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
{
#if defined(__amd64__)
CPU::x64::AMD::CPUID0x1 cpuid1amd;
#elif defined(__i386__)
CPU::x32::AMD::CPUID0x1 cpuid1amd;
#endif
#if defined(__amd64__) || defined(__i386__)
asmv("cpuid"
: "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw)
: "a"(0x1));
#endif
if (cpuid1amd.EDX.PGE)
PGESupport = true;
if (cpuid1amd.EDX.SSE)
SSESupport = true;
}
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;
#elif defined(__i386__)
CPU::x32::Intel::CPUID0x1 cpuid1intel;
#endif
#if defined(__amd64__) || defined(__i386__)
asmv("cpuid"
: "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw)
: "a"(0x1));
#endif
if (cpuid1intel.EDX.PGE)
PGESupport = true;
if (cpuid1intel.EDX.SSE)
SSESupport = true;
}
if (PGESupport)
{
debug("Enabling global pages support...");
if (!BSP)
@ -187,16 +226,29 @@ namespace CPU
cr4.PGE = 1;
}
if (rdx & x64::CPUID_FEAT_RDX_SSE)
{
debug("Enabling SSE support...");
if (!BSP)
KPrint("SSE is supported.");
cr0.EM = 0;
cr0.MP = 1;
cr4.OSFXSR = 1;
cr4.OSXMMEXCPT = 1;
}
bool SSEEnableAfter = false;
if (strcmp(CPU::Hypervisor(), x86_CPUID_VENDOR_TCG) != 0) /* Not sure if my code is not working properly or something else is the issue. */
if (SSESupport)
{
debug("Enabling SSE support...");
if (!BSP)
KPrint("SSE is supported.");
cr0.EM = 0;
cr0.MP = 1;
cr4.OSFXSR = 1;
cr4.OSXMMEXCPT = 1;
CPUData *CoreData = GetCPU(Core);
CoreData->Data.FPU = (CPU::x64::FXState *)KernelAllocator.RequestPages(TO_PAGES(sizeof(CPU::x64::FXState)));
memset(CoreData->Data.FPU, 0, FROM_PAGES(TO_PAGES(sizeof(CPU::x64::FXState))));
CoreData->Data.FPU->mxcsr = 0b0001111110000000;
CoreData->Data.FPU->mxcsrmask = 0b1111111110111111;
CoreData->Data.FPU->fcw = 0b0000001100111111;
CPU::x64::fxrstor(CoreData->Data.FPU);
SSEEnableAfter = true;
}
if (!BSP)
KPrint("Enabling CPU cache.");
@ -207,24 +259,26 @@ namespace CPU
x64::writecr0(cr0);
// FIXME: I don't think this is reporting correctly. This has to be fixed asap.
debug("Enabling UMIP, SMEP & SMAP support...");
x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx);
if (rdx & x64::CPUID_FEAT_RDX_UMIP) // https://en.wikipedia.org/wiki/Control_register
uint32_t eax, ebx, ecx, edx;
x64::cpuid(0x1, &eax, &ebx, &ecx, &edx);
if (edx & (1 << 2)) // https://en.wikipedia.org/wiki/Control_register
{
if (!BSP)
KPrint("UMIP is supported.");
debug("UMIP is supported.");
// cr4.UMIP = 1;
}
if (rdx & x64::CPUID_FEAT_RDX_SMEP) // https://en.wikipedia.org/wiki/Control_register#SMEP
// https://web.archive.org/web/20160312223150/http://ncsi.com/nsatc11/presentations/wednesday/emerging_technologies/fischer.pdf
if (edx & (1 << 7)) // https://en.wikipedia.org/wiki/Control_register#SMEP
// https://web.archive.org/web/20160312223150/http://ncsi.com/nsatc11/presentations/wednesday/emerging_technologies/fischer.pdf
{
if (!BSP)
KPrint("SMEP is supported.");
debug("SMEP is supported.");
// cr4.SMEP = 1;
}
if (rdx & x64::CPUID_FEAT_RDX_SMAP) // https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention
if (edx & (1 << 20)) // https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention
{
if (!BSP)
KPrint("SMAP is supported.");
@ -252,6 +306,8 @@ namespace CPU
x64::wrmsr(x64::MSR_CR_PAT, 0x6 | (0x0 << 8) | (0x1 << 16));
if (!BSP++)
trace("Features for BSP initialized.");
if (SSEEnableAfter)
SSEEnabled = true;
#elif defined(__i386__)
#elif defined(__aarch64__)
#endif
@ -276,6 +332,14 @@ namespace CPU
x86SIMDType CheckSIMD()
{
if (unlikely(!SSEEnabled))
return SIMD_NONE;
static x86SIMDType SIMDType = SIMD_NONE;
if (likely(SIMDType != SIMD_NONE))
return SIMDType;
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
{
#if defined(__amd64__)
@ -289,17 +353,32 @@ namespace CPU
: "a"(0x1));
#endif
if (cpuid1amd.ECX.SSE4_2)
return SIMD_SSE42;
SIMDType = SIMD_SSE42;
else if (cpuid1amd.ECX.SSE4_1)
return SIMD_SSE41;
SIMDType = SIMD_SSE41;
else if (cpuid1amd.ECX.SSE3)
return SIMD_SSE3;
SIMDType = SIMD_SSE3;
else if (cpuid1amd.EDX.SSE2)
return SIMD_SSE2;
SIMDType = SIMD_SSE2;
else if (cpuid1amd.EDX.SSE)
return SIMD_SSE;
SIMDType = SIMD_SSE;
#ifdef DEBUG
if (cpuid1amd.ECX.SSE4_2)
debug("SSE4.2 is supported.");
if (cpuid1amd.ECX.SSE4_1)
debug("SSE4.1 is supported.");
if (cpuid1amd.ECX.SSE3)
debug("SSE3 is supported.");
if (cpuid1amd.EDX.SSE2)
debug("SSE2 is supported.");
if (cpuid1amd.EDX.SSE)
debug("SSE is supported.");
#endif
return SIMDType;
}
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;
@ -312,15 +391,30 @@ namespace CPU
: "a"(0x1));
#endif
if (cpuid1intel.ECX.SSE4_2)
return SIMD_SSE42;
SIMDType = SIMD_SSE42;
else if (cpuid1intel.ECX.SSE4_1)
return SIMD_SSE41;
SIMDType = SIMD_SSE41;
else if (cpuid1intel.ECX.SSE3)
return SIMD_SSE3;
SIMDType = SIMD_SSE3;
else if (cpuid1intel.EDX.SSE2)
return SIMD_SSE2;
SIMDType = SIMD_SSE2;
else if (cpuid1intel.EDX.SSE)
return SIMD_SSE;
SIMDType = SIMD_SSE;
#ifdef DEBUG
if (cpuid1intel.ECX.SSE4_2)
debug("SSE4.2 is supported.");
if (cpuid1intel.ECX.SSE4_1)
debug("SSE4.1 is supported.");
if (cpuid1intel.ECX.SSE3)
debug("SSE3 is supported.");
if (cpuid1intel.EDX.SSE2)
debug("SSE2 is supported.");
if (cpuid1intel.EDX.SSE)
debug("SSE is supported.");
#endif
return SIMDType;
}
return SIMD_NONE;
@ -328,6 +422,9 @@ namespace CPU
bool CheckSIMD(x86SIMDType Type)
{
if (unlikely(!SSEEnabled))
return false;
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
{
#if defined(__amd64__)
@ -351,7 +448,7 @@ namespace CPU
else if (Type == SIMD_SSE)
return cpuid1amd.EDX.SSE;
}
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;

View File

@ -22,7 +22,7 @@ namespace Random
#endif
RDRANDFlag = cpuid1amd.ECX.RDRAND;
}
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;
@ -71,7 +71,7 @@ namespace Random
#endif
RDRANDFlag = cpuid1amd.ECX.RDRAND;
}
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;
@ -120,7 +120,7 @@ namespace Random
#endif
RDRANDFlag = cpuid1amd.ECX.RDRAND;
}
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;

View File

@ -129,24 +129,6 @@ namespace GraphicalUserInterface
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
};
uint32_t DesktopFadeEffect[] = {
0xFF000000,
0x010101,
0x040404,
0x080808,
0x101010,
0x121212,
0x151515,
0x181818,
0x1A1A1A,
0x1D1D1D,
0x1F1F1F,
0x222222,
0x242424,
0x262626,
0x282828,
};
char CloseButton[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
@ -262,22 +244,6 @@ namespace GraphicalUserInterface
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
};
uint32_t CloseButtonFade[] = {
0x404040,
0x770000,
0x990000,
0xBB0000,
0xDD0000,
0xFF0000,
};
uint32_t MaximizeMinimizeButtonFade[] = {
0x404040,
0x454545,
0x505050,
0x5F5F5F,
};
O1 void GUI::FetchInputs()
{
KernelCallback callback;

View File

@ -35,6 +35,12 @@
* - General:
* https://wiki.osdev.org/Main_Page
*
* - CPU XCR0 structure:
* https://wiki.osdev.org/CPU_Registers_x86#XCR0
*
* - CPUID 0x7:
* https://en.wikipedia.org/wiki/CPUID
*
* - Network:
* https://web.archive.org/web/20051210132103/http://users.pcnet.ro/dmoroian/beej/Beej.html
* https://web.archive.org/web/20060229214053/http://www.cs.rutgers.edu/~pxk/417/notes/sockets/udp.html
@ -123,7 +129,7 @@ EXTERNC __no_instrument_function void Main(BootInfo *Info)
KPrint("Initializing GDT and IDT");
Interrupts::Initialize(0);
KPrint("Initializing CPU Features");
CPU::InitializeFeatures();
CPU::InitializeFeatures(0);
KPrint("Loading Kernel Symbols");
KernelSymbolTable = new SymbolResolver::Symbols((uintptr_t)Info->Kernel.FileBase);
KPrint("Reading Kernel Parameters");

View File

@ -3,346 +3,191 @@
#include <memory.hpp>
#include <limits.h>
#include <debug.h>
#include <cpu.hpp>
// TODO: Replace mem* with assembly code
/* Some of the functions are from musl library */
/* https://www.musl-libc.org/ */
/*
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
void *memcpy_unsafe(void *dest, const void *src, size_t n)
EXTERNC void *memcpy_sse(void *dest, const void *src, size_t n)
{
unsigned char *d = dest;
const unsigned char *s = src;
char *d = (char *)dest;
const char *s = (const char *)src;
#ifdef __GNUC__
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LS >>
#define RS <<
#else
#define LS <<
#define RS >>
#endif
typedef uint32_t __attribute__((__may_alias__)) u32;
uint32_t w, x;
for (; (uintptr_t)s % 4 && n; n--)
*d++ = *s++;
if ((uintptr_t)d % 4 == 0)
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
{
for (; n >= 16; s += 16, d += 16, n -= 16)
size_t num_vectors = n / 16;
for (size_t i = 0; i < num_vectors; i++)
{
*(u32 *)(d + 0) = *(u32 *)(s + 0);
*(u32 *)(d + 4) = *(u32 *)(s + 4);
*(u32 *)(d + 8) = *(u32 *)(s + 8);
*(u32 *)(d + 12) = *(u32 *)(s + 12);
asmv("movaps (%0), %%xmm0\n"
"movaps %%xmm0, (%1)\n"
:
: "r"(s), "r"(d)
: "xmm0");
d += 16;
s += 16;
}
if (n & 8)
n -= num_vectors * 16;
}
memcpy_unsafe(d, s, n);
return dest;
}
EXTERNC void *memcpy_sse2(void *dest, const void *src, size_t n)
{
char *d = (char *)dest;
const char *s = (const char *)src;
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
{
size_t num_vectors = n / 16;
for (size_t i = 0; i < num_vectors; i++)
{
*(u32 *)(d + 0) = *(u32 *)(s + 0);
*(u32 *)(d + 4) = *(u32 *)(s + 4);
asmv("movdqa (%0), %%xmm0\n"
"movdqa %%xmm0, (%1)\n"
:
: "r"(s), "r"(d)
: "xmm0");
d += 16;
s += 16;
}
n -= num_vectors * 16;
}
memcpy_unsafe(d, s, n);
return dest;
}
EXTERNC void *memcpy_sse3(void *dest, const void *src, size_t n)
{
char *d = (char *)dest;
const char *s = (const char *)src;
if ((((uintptr_t)d | (uintptr_t)s) & 0x7) == 0)
{
size_t num_vectors = n / 8;
for (size_t i = 0; i < num_vectors; i++)
{
asmv("movq (%0), %%xmm0\n"
"movddup %%xmm0, %%xmm1\n"
"movq %%xmm1, (%1)\n"
:
: "r"(s), "r"(d)
: "xmm0", "xmm1");
d += 8;
s += 8;
}
if (n & 4)
{
*(u32 *)(d + 0) = *(u32 *)(s + 0);
d += 4;
s += 4;
}
if (n & 2)
{
*d++ = *s++;
*d++ = *s++;
}
if (n & 1)
{
*d = *s;
}
return dest;
n -= num_vectors * 8;
}
if (n >= 32)
switch ((uintptr_t)d % 4)
{
case 1:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
n -= 3;
for (; n >= 17; s += 16, d += 16, n -= 16)
{
x = *(u32 *)(s + 1);
*(u32 *)(d + 0) = (w LS 24) | (x RS 8);
w = *(u32 *)(s + 5);
*(u32 *)(d + 4) = (x LS 24) | (w RS 8);
x = *(u32 *)(s + 9);
*(u32 *)(d + 8) = (w LS 24) | (x RS 8);
w = *(u32 *)(s + 13);
*(u32 *)(d + 12) = (x LS 24) | (w RS 8);
}
break;
case 2:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
n -= 2;
for (; n >= 18; s += 16, d += 16, n -= 16)
{
x = *(u32 *)(s + 2);
*(u32 *)(d + 0) = (w LS 16) | (x RS 16);
w = *(u32 *)(s + 6);
*(u32 *)(d + 4) = (x LS 16) | (w RS 16);
x = *(u32 *)(s + 10);
*(u32 *)(d + 8) = (w LS 16) | (x RS 16);
w = *(u32 *)(s + 14);
*(u32 *)(d + 12) = (x LS 16) | (w RS 16);
}
break;
case 3:
w = *(u32 *)s;
*d++ = *s++;
n -= 1;
for (; n >= 19; s += 16, d += 16, n -= 16)
{
x = *(u32 *)(s + 3);
*(u32 *)(d + 0) = (w LS 8) | (x RS 24);
w = *(u32 *)(s + 7);
*(u32 *)(d + 4) = (x LS 8) | (w RS 24);
x = *(u32 *)(s + 11);
*(u32 *)(d + 8) = (w LS 8) | (x RS 24);
w = *(u32 *)(s + 15);
*(u32 *)(d + 12) = (x LS 8) | (w RS 24);
}
break;
}
if (n & 16)
{
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
if (n & 8)
{
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
if (n & 4)
{
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
if (n & 2)
{
*d++ = *s++;
*d++ = *s++;
}
if (n & 1)
{
*d = *s;
}
return dest;
#endif
for (; n; n--)
*d++ = *s++;
memcpy_unsafe(d, s, n);
return dest;
}
void *memset_unsafe(void *dest, int c, size_t n)
EXTERNC void *memcpy_ssse3(void *dest, const void *src, size_t n)
{
unsigned char *s = dest;
size_t k;
char *d = (char *)dest;
const char *s = (const char *)src;
if (!n)
return dest;
s[0] = c;
s[n - 1] = c;
if (n <= 2)
return dest;
s[1] = c;
s[2] = c;
s[n - 2] = c;
s[n - 3] = c;
if (n <= 6)
return dest;
s[3] = c;
s[n - 4] = c;
if (n <= 8)
return dest;
k = -(uintptr_t)s & 3;
s += k;
n -= k;
n &= -4;
#ifdef __GNUC__
typedef uint32_t __attribute__((__may_alias__)) u32;
typedef uint64_t __attribute__((__may_alias__)) u64;
u32 c32 = ((u32)-1) / 255 * (unsigned char)c;
*(u32 *)(s + 0) = c32;
*(u32 *)(s + n - 4) = c32;
if (n <= 8)
return dest;
*(u32 *)(s + 4) = c32;
*(u32 *)(s + 8) = c32;
*(u32 *)(s + n - 12) = c32;
*(u32 *)(s + n - 8) = c32;
if (n <= 24)
return dest;
*(u32 *)(s + 12) = c32;
*(u32 *)(s + 16) = c32;
*(u32 *)(s + 20) = c32;
*(u32 *)(s + 24) = c32;
*(u32 *)(s + n - 28) = c32;
*(u32 *)(s + n - 24) = c32;
*(u32 *)(s + n - 20) = c32;
*(u32 *)(s + n - 16) = c32;
k = 24 + ((uintptr_t)s & 4);
s += k;
n -= k;
u64 c64 = c32 | ((u64)c32 << 32);
for (; n >= 32; n -= 32, s += 32)
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
{
*(u64 *)(s + 0) = c64;
*(u64 *)(s + 8) = c64;
*(u64 *)(s + 16) = c64;
*(u64 *)(s + 24) = c64;
}
#else
for (; n; n--, s++)
*s = c;
#endif
size_t num_vectors = n / 16;
for (size_t i = 0; i < num_vectors; i++)
{
asmv("movdqa (%0), %%xmm0\n"
"movdqa 16(%0), %%xmm1\n"
"palignr $8, %%xmm0, %%xmm1\n"
"movdqa %%xmm1, (%1)\n"
:
: "r"(s), "r"(d)
: "xmm0", "xmm1");
d += 16;
s += 16;
}
n -= num_vectors * 16;
}
memcpy_unsafe(d, s, n);
return dest;
}
void *memmove_unsafe(void *dest, const void *src, size_t n)
EXTERNC void *memcpy_sse4_1(void *dest, const void *src, size_t n)
{
#ifdef __GNUC__
typedef __attribute__((__may_alias__)) size_t WT;
#define WS (sizeof(WT))
#endif
CPU::__m128i *d = (CPU::__m128i *)dest;
const CPU::__m128i *s = (const CPU::__m128i *)src;
char *d = dest;
const char *s = src;
if (d == s)
return d;
if ((uintptr_t)s - (uintptr_t)d - n <= -2 * n)
return memcpy(d, s, n);
if (d < s)
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
{
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
size_t num_vectors = n / 16;
for (size_t i = 0; i < num_vectors; i++)
{
while ((uintptr_t)d % WS)
{
if (!n--)
return dest;
*d++ = *s++;
}
for (; n >= WS; n -= WS, d += WS, s += WS)
*(WT *)d = *(WT *)s;
// movntdqa
asmv("movdqa (%0), %%xmm0\n"
"movdqa %%xmm0, (%1)\n"
:
: "r"(s), "r"(d)
: "xmm0");
d += 16;
s += 16;
}
#endif
for (; n; n--)
*d++ = *s++;
}
else
{
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
{
while ((uintptr_t)(d + n) % WS)
{
if (!n--)
return dest;
d[n] = s[n];
}
while (n >= WS)
n -= WS, *(WT *)(d + n) = *(WT *)(s + n);
}
#endif
while (n)
n--, d[n] = s[n];
n -= num_vectors * 16;
}
memcpy_unsafe(d, s, n);
return dest;
}
int memcmp(const void *vl, const void *vr, size_t n)
EXTERNC void *memcpy_sse4_2(void *dest, const void *src, size_t n)
{
const unsigned char *l = vl, *r = vr;
char *d = (char *)dest;
const char *s = (const char *)src;
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
{
size_t num_vectors = n / 16;
for (size_t i = 0; i < num_vectors; i++)
{
asmv("movdqa (%0), %%xmm0\n"
"pcmpistri $0, (%0), %%xmm0\n"
"movdqa %%xmm0, (%1)\n"
:
: "r"(s), "r"(d)
: "xmm0");
d += 16;
s += 16;
}
n -= num_vectors * 16;
}
memcpy_unsafe(d, s, n);
return dest;
}
EXTERNC int memcmp(const void *vl, const void *vr, size_t n)
{
const unsigned char *l = (unsigned char *)vl, *r = (unsigned char *)vr;
for (; n && *l == *r; n--, l++, r++)
;
return n ? *l - *r : 0;
}
void backspace(char s[])
EXTERNC void backspace(char s[])
{
int len = strlen(s);
s[len - 1] = '\0';
}
void append(char s[], char n)
EXTERNC void append(char s[], char n)
{
int len = strlen(s);
s[len] = n;
s[len + 1] = '\0';
}
int strncmp(const char *s1, const char *s2, size_t n)
EXTERNC int strncmp(const char *s1, const char *s2, size_t n)
{
for (size_t i = 0; i < n; i++)
{
@ -355,7 +200,7 @@ int strncmp(const char *s1, const char *s2, size_t n)
return 0;
}
long unsigned strlen(const char s[])
EXTERNC long unsigned strlen(const char s[])
{
long unsigned i = 0;
if (s)
@ -364,7 +209,7 @@ long unsigned strlen(const char s[])
return i;
}
char *strcat_unsafe(char *destination, const char *source)
EXTERNC char *strcat_unsafe(char *destination, const char *source)
{
if ((destination == NULL) || (source == NULL))
return NULL;
@ -381,7 +226,7 @@ char *strcat_unsafe(char *destination, const char *source)
return destination;
}
char *strcpy_unsafe(char *destination, const char *source)
EXTERNC char *strcpy_unsafe(char *destination, const char *source)
{
if (destination == NULL)
return NULL;
@ -396,7 +241,7 @@ char *strcpy_unsafe(char *destination, const char *source)
return ptr;
}
char *strncpy(char *destination, const char *source, unsigned long num)
EXTERNC char *strncpy(char *destination, const char *source, unsigned long num)
{
if (destination == NULL)
return NULL;
@ -411,14 +256,14 @@ char *strncpy(char *destination, const char *source, unsigned long num)
return ptr;
}
int strcmp(const char *l, const char *r)
EXTERNC int strcmp(const char *l, const char *r)
{
for (; *l == *r && *l; l++, r++)
;
return *(unsigned char *)l - *(unsigned char *)r;
}
char *strstr(const char *haystack, const char *needle)
EXTERNC char *strstr(const char *haystack, const char *needle)
{
const char *a = haystack, *b = needle;
while (1)
@ -435,7 +280,7 @@ char *strstr(const char *haystack, const char *needle)
}
}
char *strchr(const char *String, int Char)
EXTERNC char *strchr(const char *String, int Char)
{
while (*String != (char)Char)
{
@ -445,24 +290,24 @@ char *strchr(const char *String, int Char)
return (char *)String;
}
char *strdup(const char *String)
EXTERNC char *strdup(const char *String)
{
char *OutBuffer = kmalloc(strlen((char *)String) + 1);
char *OutBuffer = (char *)kmalloc(strlen((char *)String) + 1);
strncpy(OutBuffer, String, strlen(String) + 1);
return OutBuffer;
}
int isalpha(int c)
EXTERNC int isalpha(int c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
int isupper(int c)
EXTERNC int isupper(int c)
{
return (c >= 'A' && c <= 'Z');
}
long int strtol(const char *str, char **endptr, int base)
EXTERNC long int strtol(const char *str, char **endptr, int base)
{
const char *s;
long acc, cutoff;
@ -527,7 +372,7 @@ long int strtol(const char *str, char **endptr, int base)
return (acc);
}
unsigned long int strtoul(const char *str, char **endptr, int base)
EXTERNC unsigned long int strtoul(const char *str, char **endptr, int base)
{
const char *s;
unsigned long acc, cutoff;
@ -592,17 +437,17 @@ unsigned long int strtoul(const char *str, char **endptr, int base)
return (acc);
}
int isdigit(int c)
EXTERNC int isdigit(int c)
{
return c >= '0' && c <= '9';
}
int isspace(int c)
EXTERNC int isspace(int c)
{
return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || c == '\v';
}
int isempty(char *str)
EXTERNC int isempty(char *str)
{
if (strlen(str) == 0)
return 1;
@ -615,7 +460,7 @@ int isempty(char *str)
return 1;
}
unsigned int isdelim(char c, char *delim)
EXTERNC unsigned int isdelim(char c, char *delim)
{
while (*delim != '\0')
{
@ -626,23 +471,23 @@ unsigned int isdelim(char c, char *delim)
return 0;
}
int abs(int i) { return i < 0 ? -i : i; }
EXTERNC int abs(int i) { return i < 0 ? -i : i; }
void swap(char *x, char *y)
EXTERNC void swap(char *x, char *y)
{
char t = *x;
*x = *y;
*y = t;
}
char *reverse(char *Buffer, int i, int j)
EXTERNC char *reverse(char *Buffer, int i, int j)
{
while (i < j)
swap(&Buffer[i++], &Buffer[j--]);
return Buffer;
}
float sqrtf(float x)
EXTERNC float sqrtf(float x)
{
if (x < 0.0f)
return NAN;
@ -660,7 +505,7 @@ float sqrtf(float x)
return guess;
}
double clamp(double x, double low, double high)
EXTERNC double clamp(double x, double low, double high)
{
if (x < low)
return low;
@ -670,25 +515,25 @@ double clamp(double x, double low, double high)
return x;
}
float lerp(float a, float b, float t)
EXTERNC float lerp(float a, float b, float t)
{
return (1 - t) * a + t * b;
}
float smoothstep(float a, float b, float t)
EXTERNC float smoothstep(float a, float b, float t)
{
t = clamp(t, 0.0, 1.0);
return lerp(a, b, t * t * (3 - 2 * t));
}
float cubicInterpolate(float a, float b, float t)
EXTERNC float cubicInterpolate(float a, float b, float t)
{
float t2 = t * t;
float t3 = t2 * t;
return a + (-2 * t3 + 3 * t2) * b;
}
char *strtok(char *src, const char *delim)
EXTERNC char *strtok(char *src, const char *delim)
{
static char *src1;
if (!src)
@ -728,7 +573,7 @@ char *strtok(char *src, const char *delim)
return NULL;
}
int atoi(const char *String)
EXTERNC int atoi(const char *String)
{
uint64_t Length = strlen((char *)String);
uint64_t OutBuffer = 0;
@ -741,7 +586,7 @@ int atoi(const char *String)
return OutBuffer;
}
double atof(const char *String)
EXTERNC double atof(const char *String)
{
// Originally from https://github.com/GaloisInc/minlibc/blob/master/atof.c
/*
@ -823,7 +668,7 @@ double atof(const char *String)
return a;
}
char *itoa(int Value, char *Buffer, int Base)
EXTERNC char *itoa(int Value, char *Buffer, int Base)
{
if (Base < 2 || Base > 32)
return Buffer;
@ -851,7 +696,7 @@ char *itoa(int Value, char *Buffer, int Base)
return reverse(Buffer, 0, i - 1);
}
char *ltoa(long Value, char *Buffer, int Base)
EXTERNC char *ltoa(long Value, char *Buffer, int Base)
{
if (Base < 2 || Base > 32)
return Buffer;
@ -879,7 +724,7 @@ char *ltoa(long Value, char *Buffer, int Base)
return reverse(Buffer, 0, i - 1);
}
char *ultoa(unsigned long Value, char *Buffer, int Base)
EXTERNC char *ultoa(unsigned long Value, char *Buffer, int Base)
{
if (Base < 2 || Base > 32)
return Buffer;
@ -904,7 +749,7 @@ char *ultoa(unsigned long Value, char *Buffer, int Base)
return reverse(Buffer, 0, i - 1);
}
extern void __chk_fail(void) __attribute__((__noreturn__));
EXTERNC void __chk_fail(void) __attribute__((__noreturn__));
__noreturn static inline void __convert_chk_fail(void)
{
@ -918,7 +763,7 @@ __noreturn static inline void __convert_chk_fail(void)
// #define DBG_CHK 1
__no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len, size_t slen)
EXTERNC __no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len, size_t slen)
{
#ifdef DBG_CHK
debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen);
@ -949,10 +794,36 @@ __no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len,
if (unlikely(len > slen))
__chk_fail();
return memcpy_unsafe(dest, src, len);
switch (CPU::CheckSIMD())
{
case CPU::x86SIMDType::SIMD_SSE:
return memcpy_sse(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE2:
return memcpy_sse2(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE3:
return memcpy_sse3(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSSE3:
return memcpy_ssse3(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE41:
return memcpy_sse4_1(dest, src, len);
break;
case CPU::x86SIMDType::SIMD_SSE42:
return memcpy_sse4_2(dest, src, len);
break;
default:
return memcpy_unsafe(dest, src, len);
break;
}
error("Should not be here!");
CPU::Stop();
}
__no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t slen)
EXTERNC __no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t slen)
{
#ifdef DBG_CHK
debug("( dest:%#lx val:%#x len:%llu slen:%llu )", dest, val, len, slen);
@ -980,7 +851,7 @@ __no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t
return memset_unsafe(dest, val, len);
}
__no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len, size_t slen)
EXTERNC __no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len, size_t slen)
{
#ifdef DBG_CHK
debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen);
@ -1014,7 +885,7 @@ __no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len
return memmove_unsafe(dest, src, len);
}
__no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen)
EXTERNC __no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen)
{
#ifdef DBG_CHK
debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen);
@ -1043,7 +914,7 @@ __no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen
return strcat_unsafe(dest, src);
}
__no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen)
EXTERNC __no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen)
{
#ifdef DBG_CHK
debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen);
@ -1070,5 +941,6 @@ __no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen
if (unlikely(len >= slen))
__chk_fail();
return strcpy_unsafe(dest, src);
}

320
Library/memcpy.c Normal file
View File

@ -0,0 +1,320 @@
#include <convert.h>
#include <memory.hpp>
#include <limits.h>
#include <debug.h>
/* Some of the functions are from musl library */
/* https://www.musl-libc.org/ */
/*
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
void *memcpy_unsafe(void *dest, const void *src, size_t n)
{
unsigned char *d = dest;
const unsigned char *s = src;
#ifdef __GNUC__
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LS >>
#define RS <<
#else
#define LS <<
#define RS >>
#endif
typedef uint32_t __attribute__((__may_alias__)) u32;
uint32_t w, x;
for (; (uintptr_t)s % 4 && n; n--)
*d++ = *s++;
if ((uintptr_t)d % 4 == 0)
{
for (; n >= 16; s += 16, d += 16, n -= 16)
{
*(u32 *)(d + 0) = *(u32 *)(s + 0);
*(u32 *)(d + 4) = *(u32 *)(s + 4);
*(u32 *)(d + 8) = *(u32 *)(s + 8);
*(u32 *)(d + 12) = *(u32 *)(s + 12);
}
if (n & 8)
{
*(u32 *)(d + 0) = *(u32 *)(s + 0);
*(u32 *)(d + 4) = *(u32 *)(s + 4);
d += 8;
s += 8;
}
if (n & 4)
{
*(u32 *)(d + 0) = *(u32 *)(s + 0);
d += 4;
s += 4;
}
if (n & 2)
{
*d++ = *s++;
*d++ = *s++;
}
if (n & 1)
{
*d = *s;
}
return dest;
}
if (n >= 32)
switch ((uintptr_t)d % 4)
{
case 1:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
n -= 3;
for (; n >= 17; s += 16, d += 16, n -= 16)
{
x = *(u32 *)(s + 1);
*(u32 *)(d + 0) = (w LS 24) | (x RS 8);
w = *(u32 *)(s + 5);
*(u32 *)(d + 4) = (x LS 24) | (w RS 8);
x = *(u32 *)(s + 9);
*(u32 *)(d + 8) = (w LS 24) | (x RS 8);
w = *(u32 *)(s + 13);
*(u32 *)(d + 12) = (x LS 24) | (w RS 8);
}
break;
case 2:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
n -= 2;
for (; n >= 18; s += 16, d += 16, n -= 16)
{
x = *(u32 *)(s + 2);
*(u32 *)(d + 0) = (w LS 16) | (x RS 16);
w = *(u32 *)(s + 6);
*(u32 *)(d + 4) = (x LS 16) | (w RS 16);
x = *(u32 *)(s + 10);
*(u32 *)(d + 8) = (w LS 16) | (x RS 16);
w = *(u32 *)(s + 14);
*(u32 *)(d + 12) = (x LS 16) | (w RS 16);
}
break;
case 3:
w = *(u32 *)s;
*d++ = *s++;
n -= 1;
for (; n >= 19; s += 16, d += 16, n -= 16)
{
x = *(u32 *)(s + 3);
*(u32 *)(d + 0) = (w LS 8) | (x RS 24);
w = *(u32 *)(s + 7);
*(u32 *)(d + 4) = (x LS 8) | (w RS 24);
x = *(u32 *)(s + 11);
*(u32 *)(d + 8) = (w LS 8) | (x RS 24);
w = *(u32 *)(s + 15);
*(u32 *)(d + 12) = (x LS 8) | (w RS 24);
}
break;
}
if (n & 16)
{
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
if (n & 8)
{
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
if (n & 4)
{
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
if (n & 2)
{
*d++ = *s++;
*d++ = *s++;
}
if (n & 1)
{
*d = *s;
}
return dest;
#endif
for (; n; n--)
*d++ = *s++;
return dest;
}
void *memset_unsafe(void *dest, int c, size_t n)
{
unsigned char *s = dest;
size_t k;
if (!n)
return dest;
s[0] = c;
s[n - 1] = c;
if (n <= 2)
return dest;
s[1] = c;
s[2] = c;
s[n - 2] = c;
s[n - 3] = c;
if (n <= 6)
return dest;
s[3] = c;
s[n - 4] = c;
if (n <= 8)
return dest;
k = -(uintptr_t)s & 3;
s += k;
n -= k;
n &= -4;
#ifdef __GNUC__
typedef uint32_t __attribute__((__may_alias__)) u32;
typedef uint64_t __attribute__((__may_alias__)) u64;
u32 c32 = ((u32)-1) / 255 * (unsigned char)c;
*(u32 *)(s + 0) = c32;
*(u32 *)(s + n - 4) = c32;
if (n <= 8)
return dest;
*(u32 *)(s + 4) = c32;
*(u32 *)(s + 8) = c32;
*(u32 *)(s + n - 12) = c32;
*(u32 *)(s + n - 8) = c32;
if (n <= 24)
return dest;
*(u32 *)(s + 12) = c32;
*(u32 *)(s + 16) = c32;
*(u32 *)(s + 20) = c32;
*(u32 *)(s + 24) = c32;
*(u32 *)(s + n - 28) = c32;
*(u32 *)(s + n - 24) = c32;
*(u32 *)(s + n - 20) = c32;
*(u32 *)(s + n - 16) = c32;
k = 24 + ((uintptr_t)s & 4);
s += k;
n -= k;
u64 c64 = c32 | ((u64)c32 << 32);
for (; n >= 32; n -= 32, s += 32)
{
*(u64 *)(s + 0) = c64;
*(u64 *)(s + 8) = c64;
*(u64 *)(s + 16) = c64;
*(u64 *)(s + 24) = c64;
}
#else
for (; n; n--, s++)
*s = c;
#endif
return dest;
}
void *memmove_unsafe(void *dest, const void *src, size_t n)
{
#ifdef __GNUC__
typedef __attribute__((__may_alias__)) size_t WT;
#define WS (sizeof(WT))
#endif
char *d = dest;
const char *s = src;
if (d == s)
return d;
if ((uintptr_t)s - (uintptr_t)d - n <= -2 * n)
return memcpy(d, s, n);
if (d < s)
{
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
{
while ((uintptr_t)d % WS)
{
if (!n--)
return dest;
*d++ = *s++;
}
for (; n >= WS; n -= WS, d += WS, s += WS)
*(WT *)d = *(WT *)s;
}
#endif
for (; n; n--)
*d++ = *s++;
}
else
{
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
{
while ((uintptr_t)(d + n) % WS)
{
if (!n--)
return dest;
d[n] = s[n];
}
while (n >= WS)
n -= WS, *(WT *)(d + n) = *(WT *)(s + n);
}
#endif
while (n)
n--, d[n] = s[n];
}
return dest;
}

View File

@ -788,8 +788,8 @@ namespace Tasking
Thread->ExitCode = 0xdead;
Thread->Status = TaskStatus::Ready;
Thread->Memory = new Memory::MemMgr(Parent->PageTable);
Thread->FPU = (FXState *)Thread->Memory->RequestPages(TO_PAGES(sizeof(FXState)));
memset(Thread->FPU, 0, FROM_PAGES(TO_PAGES(sizeof(FXState))));
Thread->FPU = (CPU::x64::FXState *)Thread->Memory->RequestPages(TO_PAGES(sizeof(CPU::x64::FXState)));
memset(Thread->FPU, 0, FROM_PAGES(TO_PAGES(sizeof(CPU::x64::FXState))));
// TODO: Is really a good idea to use the FPU in kernel mode?
Thread->FPU->mxcsr = 0b0001111110000000;
@ -1169,10 +1169,37 @@ namespace Tasking
debug("Created Kernel Process: %s and Thread: %s", kproc->Name, kthrd->Name);
TaskingLock.Lock(__FUNCTION__);
bool MONITORSupported = false;
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
{
#if defined(__amd64__)
uint32_t rax, rbx, rcx, rdx;
CPU::x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx);
if (rcx & CPU::x64::CPUID_FEAT_RCX_MONITOR)
CPU::x64::AMD::CPUID0x1 cpuid1amd;
#elif defined(__i386__)
CPU::x32::AMD::CPUID0x1 cpuid1amd;
#endif
#if defined(__amd64__) || defined(__i386__)
asmv("cpuid"
: "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw)
: "a"(0x1));
#endif
MONITORSupported = cpuid1amd.ECX.MONITOR;
}
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;
#elif defined(__i386__)
CPU::x32::Intel::CPUID0x1 cpuid1intel;
#endif
#if defined(__amd64__) || defined(__i386__)
asmv("cpuid"
: "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw)
: "a"(0x1));
#endif
MONITORSupported = cpuid1intel.ECX.MONITOR;
}
if (MONITORSupported)
{
trace("CPU has MONITOR/MWAIT support.");
}
@ -1182,7 +1209,7 @@ namespace Tasking
error("Interrupts are not enabled.");
CPU::Interrupts(CPU::Enable);
}
#endif
TaskingLock.Unlock();
IdleProcess = CreateProcess(nullptr, (char *)"Idle", TaskTrustLevel::Idle);
for (int i = 0; i < SMP::CPUCores; i++)

View File

@ -21,7 +21,7 @@ __constructor void TestRandom()
#endif
RDRANDFlag = cpuid1amd.ECX.RDRAND;
}
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
{
#if defined(__amd64__)
CPU::x64::Intel::CPUID0x1 cpuid1intel;

View File

@ -123,6 +123,29 @@ namespace CPU
SIMD_SSSE3,
SIMD_SSE41,
SIMD_SSE42,
SIMD_AVX,
SIMD_AVX2,
SIMD_AVX512F,
SIMD_AVX512BW,
SIMD_AVX512CD,
SIMD_AVX512DQ,
SIMD_AVX512ER,
SIMD_AVX512IFMA,
SIMD_AVX512PF,
SIMD_AVX512VBMI,
SIMD_AVX512VL,
SIMD_AVX512VNNI,
SIMD_AVX512BITALG,
SIMD_AVX512VPOPCNTDQ,
SIMD_AVX512_4VNNIW,
SIMD_AVX512_4FMAPS,
SIMD_AVX512_VP2INTERSECT,
SIMD_AVX512_BF16,
SIMD_AVX512_VBMI2,
SIMD_AVX512_GFNI,
SIMD_AVX512_VAES,
SIMD_AVX512_VPCLMULQDQ,
SIMD_AVX512_VNNI,
};
/**
@ -147,7 +170,7 @@ namespace CPU
char *Hypervisor();
/**
* @brief Check SIMD support.
* @brief Check SIMD support. It will return the highest supported SIMD type.
*
* @return x86SIMDType
*/
@ -227,11 +250,25 @@ namespace CPU
void *PageTable(void *PT = nullptr);
/** @brief To be used only once. */
void InitializeFeatures();
void InitializeFeatures(long Core);
/** @brief Get CPU counter value. */
uintptr_t Counter();
typedef int __v4si __attribute__((__vector_size__(16)));
typedef union
{
__v4si vector;
long long int i64[2];
int i32[4];
short i16[8];
char i8[16];
int __attribute__((__vector_size__(16))) m128i_i32;
short __attribute__((__vector_size__(16))) m128i_i16;
char __attribute__((__vector_size__(16))) m128i_i8;
} __m128i;
namespace MemBar
{
SafeFunction static inline void Barrier()
@ -552,6 +589,32 @@ namespace CPU
uint32_t raw;
} DR7;
struct FXState
{
/** @brief FPU control word */
uint16_t fcw;
/** @brief FPU status word */
uint16_t fsw;
/** @brief FPU tag words */
uint8_t ftw;
/** @brief Reserved (zero) */
uint8_t Reserved;
/** @brief FPU opcode */
uint16_t fop;
/** @brief PFU instruction pointer */
uint64_t rip;
/** @brief FPU data pointer */
uint64_t rdp;
/** @brief SSE control register */
uint32_t mxcsr;
/** @brief SSE control register mask */
uint32_t mxcsrmask;
/** @brief FPU registers (last 6 bytes reserved) */
uint8_t st[8][16];
/** @brief XMM registers */
uint8_t xmm[16][16];
} __attribute__((packed));
/**
* @brief CPUID
*
@ -2125,79 +2188,6 @@ namespace CPU
namespace x64
{
enum CPUIDFeatures
{
CPUID_FEAT_RCX_SSE3 = 1 << 0,
CPUID_FEAT_RCX_PCLMULQDQ = 1 << 1,
CPUID_FEAT_RCX_DTES64 = 1 << 2,
CPUID_FEAT_RCX_MONITOR = 1 << 3,
CPUID_FEAT_RCX_DS_CPL = 1 << 4,
CPUID_FEAT_RCX_VMX = 1 << 5,
CPUID_FEAT_RCX_SMX = 1 << 6,
CPUID_FEAT_RCX_EST = 1 << 7,
CPUID_FEAT_RCX_TM2 = 1 << 8,
CPUID_FEAT_RCX_SSSE3 = 1 << 9,
CPUID_FEAT_RCX_CID = 1 << 10,
CPUID_FEAT_RCX_FMA = 1 << 12,
CPUID_FEAT_RCX_CX16 = 1 << 13,
CPUID_FEAT_RCX_ETPRD = 1 << 14,
CPUID_FEAT_RCX_PDCM = 1 << 15,
CPUID_FEAT_RCX_PCIDE = 1 << 17,
CPUID_FEAT_RCX_DCA = 1 << 18,
CPUID_FEAT_RCX_SSE4_1 = 1 << 19,
CPUID_FEAT_RCX_SSE4_2 = 1 << 20,
CPUID_FEAT_RCX_x2APIC = 1 << 21,
CPUID_FEAT_RCX_MOVBE = 1 << 22,
CPUID_FEAT_RCX_POPCNT = 1 << 23,
CPUID_FEAT_RCX_AES = 1 << 25,
CPUID_FEAT_RCX_XSAVE = 1 << 26,
CPUID_FEAT_RCX_OSXSAVE = 1 << 27,
CPUID_FEAT_RCX_AVX = 1 << 28,
CPUID_FEAT_RCX_F16C = 1 << 29,
CPUID_FEAT_RCX_RDRAND = 1 << 30,
CPUID_FEAT_RDX_FPU = 1 << 0,
CPUID_FEAT_RDX_VME = 1 << 1,
CPUID_FEAT_RDX_DE = 1 << 2,
CPUID_FEAT_RDX_PSE = 1 << 3,
CPUID_FEAT_RDX_TSC = 1 << 4,
CPUID_FEAT_RDX_MSR = 1 << 5,
CPUID_FEAT_RDX_PAE = 1 << 6,
CPUID_FEAT_RDX_MCE = 1 << 7,
CPUID_FEAT_RDX_CX8 = 1 << 8,
CPUID_FEAT_RDX_APIC = 1 << 9,
CPUID_FEAT_RDX_SEP = 1 << 11,
CPUID_FEAT_RDX_MTRR = 1 << 12,
CPUID_FEAT_RDX_PGE = 1 << 13,
CPUID_FEAT_RDX_MCA = 1 << 14,
CPUID_FEAT_RDX_CMOV = 1 << 15,
CPUID_FEAT_RDX_PAT = 1 << 16,
CPUID_FEAT_RDX_PSE36 = 1 << 17,
CPUID_FEAT_RDX_PSN = 1 << 18,
CPUID_FEAT_RDX_CLF = 1 << 19,
CPUID_FEAT_RDX_DTES = 1 << 21,
CPUID_FEAT_RDX_ACPI = 1 << 22,
CPUID_FEAT_RDX_MMX = 1 << 23,
CPUID_FEAT_RDX_FXSR = 1 << 24,
CPUID_FEAT_RDX_SSE = 1 << 25,
CPUID_FEAT_RDX_SSE2 = 1 << 26,
CPUID_FEAT_RDX_SS = 1 << 27,
CPUID_FEAT_RDX_HTT = 1 << 28,
CPUID_FEAT_RDX_TM1 = 1 << 29,
CPUID_FEAT_RDX_IA64 = 1 << 30,
CPUID_FEAT_RDX_PBE = 1 << 31,
// ? Not sure how to get it.
CPUID_FEAT_RDX_SMEP = 1 << 7,
CPUID_FEAT_RDX_UMIP = 1 << 2,
CPUID_FEAT_RDX_SYSCALL = 1 << 11,
CPUID_FEAT_XD = 1 << 20,
CPUID_FEAT_1GB_PAGE = 1 << 26,
CPUID_FEAT_RDTSCP = 1 << 27,
CPUID_FEAT_LONG_MODE = 1 << 29,
CPUID_FEAT_RDX_SMAP = (1 << 20)
};
enum MSRID
{
MSR_MONITOR_FILTER_SIZE = 0x6,
@ -3110,6 +3100,41 @@ namespace CPU
uint64_t raw;
} CR8;
typedef union XCR0
{
/*
On https://wiki.osdev.org/CPU_Registers_x86#XCR0 says that the PKRU bit is 9?
*/
struct
{
/** @brief X87 FPU/MMX/SSE Support (must be 1) */
uint64_t X87 : 1;
/** @brief XSAVE support for MXCSR and XMM registers */
uint64_t SSE : 1;
/** @brief AVX support for YMM registers */
uint64_t AVX : 1;
/** @brief MPX support for BND registers */
uint64_t BNDREG : 1;
/** @brief MPX support for BNDCFGU and BNDSTATUS registers */
uint64_t BNDCSR : 1;
/** @brief AVX-512 support for opmask registers */
uint64_t OpMask : 1;
/** @brief AVX-512 enabled and XSAVE support for upper halves of lower ZMM registers */
uint64_t ZMM_HI256 : 1;
/** @brief AVX-512 enabled and XSAVE support for upper ZMM registers */
uint64_t HI16_ZMM : 1;
/** @brief XSAVE support for PKRU register */
uint64_t PKRU : 1;
/** @brief Reserved */
uint64_t Reserved0 : 53;
/** @brief AMD lightweight profiling */
uint64_t LWP : 1;
/** @brief Reserved */
uint64_t Reserved1 : 1;
};
uint64_t raw;
} XCR0;
typedef union EFER
{
struct
@ -3233,6 +3258,32 @@ namespace CPU
uint64_t raw;
} SelectorErrorCode;
struct FXState
{
/** @brief FPU control word */
uint16_t fcw;
/** @brief FPU status word */
uint16_t fsw;
/** @brief FPU tag words */
uint8_t ftw;
/** @brief Reserved (zero) */
uint8_t Reserved;
/** @brief FPU opcode */
uint16_t fop;
/** @brief PFU instruction pointer */
uint64_t rip;
/** @brief FPU data pointer */
uint64_t rdp;
/** @brief SSE control register */
uint32_t mxcsr;
/** @brief SSE control register mask */
uint32_t mxcsrmask;
/** @brief FPU registers (last 6 bytes reserved) */
uint8_t st[8][16];
/** @brief XMM registers */
uint8_t xmm[16][16];
} __attribute__((packed));
SafeFunction static inline void lgdt(void *gdt)
{
#if defined(__amd64__)
@ -3375,6 +3426,18 @@ namespace CPU
return (CR8){.raw = Result};
}
SafeFunction static inline XCR0 readxcr0()
{
uint64_t Result = 0;
#if defined(__amd64__)
asmv("xgetbv"
: "=a"(Result)
: "c"(0)
: "edx");
#endif
return (XCR0){.raw = Result};
}
SafeFunction static inline void writecr0(CR0 ControlRegister)
{
#if defined(__amd64__)
@ -3425,6 +3488,16 @@ namespace CPU
#endif
}
SafeFunction static inline void writexcr0(XCR0 ControlRegister)
{
#if defined(__amd64__)
asmv("xsetbv"
:
: "a"(ControlRegister.raw), "c"(0)
: "edx");
#endif
}
SafeFunction static inline void fxsave(void *FXSaveArea)
{
#if defined(__amd64__)
@ -3836,6 +3909,320 @@ namespace CPU
} EDX;
};
/** @brief Extended feature flags enumeration */
struct CPUID0x7_0
{
union
{
struct
{
uint64_t Reserved : 32;
};
uint64_t raw;
} EAX;
union
{
struct
{
/** @brief Access to base of fs and gs */
uint64_t FSGSBase : 1;
/** @brief IA32_TSC_ADJUST MSR */
uint64_t IA32TSCAdjust : 1;
/** @brief Software Guard Extensions */
uint64_t SGX : 1;
/** @brief Bit Manipulation Instruction Set 1 */
uint64_t BMI1 : 1;
/** @brief TSX Hardware Lock Elision */
uint64_t HLE : 1;
/** @brief Advanced Vector Extensions 2 */
uint64_t AVX2 : 1;
/** @brief FDP_EXCPTN_ONLY */
uint64_t FDPExcptonOnly : 1;
/** @brief Supervisor Mode Execution Protection */
uint64_t SMEP : 1;
/** @brief Bit Manipulation Instruction Set 2 */
uint64_t BMI2 : 1;
/** @brief Enhanced REP MOVSB/STOSB */
uint64_t ERMS : 1;
/** @brief INVPCID */
uint64_t INVPCID : 1;
/** @brief RTM */
uint64_t RTM : 1;
/** @brief Intel Resource Director Monitoring */
uint64_t RDT_M : 1;
/** @brief Deprecates FPU CS and DS values */
uint64_t DeprecatesFPU : 1;
/** @brief Intel Memory Protection Extensions */
uint64_t MPX : 1;
/** @brief Intel Resource Director Allocation */
uint64_t RDT_A : 1;
/** @brief AVX-512 Foundation */
uint64_t AVX512F : 1;
/** @brief AVX-512 Doubleword and Quadword Instructions */
uint64_t AVX512DQ : 1;
/** @brief RDSEED */
uint64_t RDSEED : 1;
/** @brief Intel Multi-Precision Add-Carry Instruction Extensions */
uint64_t ADX : 1;
/** @brief Supervisor Mode Access Prevention */
uint64_t SMAP : 1;
/** @brief AVX-512 Integer Fused Multiply-Add Instructions */
uint64_t AVX512IFMA : 1;
/** @brief Reserved */
uint64_t Reserved : 1;
/** @brief CLFLUSHOPT */
uint64_t CLFLUSHOPT : 1;
/** @brief CLWB */
uint64_t CLWB : 1;
/** @brief Intel Processor Trace */
uint64_t IntelProcessorTrace : 1;
/** @brief AVX-512 Prefetch Instructions */
uint64_t AVX512PF : 1;
/** @brief AVX-512 Exponential and Reciprocal Instructions */
uint64_t AVX512ER : 1;
/** @brief AVX-512 Conflict Detection Instructions */
uint64_t AVX512CD : 1;
/** @brief SHA Extensions */
uint64_t SHA : 1;
/** @brief AVX-512 Byte and Word Instructions */
uint64_t AVX512BW : 1;
/** @brief AVX-512 Vector Length Extensions */
uint64_t AVX512VL : 1;
};
uint64_t raw;
} EBX;
union
{
struct
{
/** @brief PREFETCHWT1 */
uint64_t PREFETCHWT1 : 1;
/** @brief AVX-512 Vector Bit Manipulation Instructions */
uint64_t AVX512VBMI : 1;
/** @brief User Mode Instruction Prevention */
uint64_t UMIP : 1;
/** @brief Memory Protection Keys for User-mode pages */
uint64_t PKU : 1;
/** @brief PKU enabled by OS */
uint64_t OSPKE : 1;
/** @brief Timed pause and user-level monitor/wait */
uint64_t WaitPKG : 1;
/** @brief AVX-512 Vector Bit Manipulation Instructions 2 */
uint64_t AVX512VBMI2 : 1;
/** @brief Control flow enforcement (CET) shadow stack */
uint64_t CET_SS : 1;
/** @brief Galois Field instructions */
uint64_t GFNI : 1;
/** @brief Vector AES instruction set (VEX-256/EVEX) */
uint64_t VAES : 1;
/** @brief CLMUL instruction set (VEX-256/EVEX) */
uint64_t VPCLMULQDQ : 1;
/** @brief AVX-512 Vector Neural Network Instructions */
uint64_t AVX512VNNI : 1;
/** @brief AVX-512 Bit Algorithms Instructions */
uint64_t AVX512BITALG : 1;
/** @brief IA32_TME related MSRs */
uint64_t TME : 1;
/** @brief AVX-512 Vector Population Count Double and Quad-word */
uint64_t AVX512VPOPCNTDQ : 1;
/** @brief Reserved */
uint64_t Reserved0 : 1;
/** @brief 5-level paging (57 address bits) */
uint64_t LA57 : 1;
/** @brief The value of userspace MPX Address-Width Adjust used by the BNDLDX and BNDSTX Intel MPX instructions in 64-bit mode */
uint64_t MAWAU : 5;
/** @brief Read Processor ID and IA32_TSC_AUX */
uint64_t RDPID : 1;
/** @brief Key Locker */
uint64_t KL : 1;
/** @brief BUS_LOCK_DETECT */
uint64_t BusLockDetect : 1;
/** @brief Cache line demote */
uint64_t CLDEMOTE : 1;
/** @brief Reserved */
uint64_t Reserved1 : 1;
/** @brief MOVDIRI */
uint64_t MOVDIRI : 1;
/** @brief MOVDIR64B */
uint64_t MOVDIR64B : 1;
/** @brief SGX Launch Configuration */
uint64_t SGX_LC : 1;
/** @brief Protection Keys for Supervisor-mode pages */
uint64_t PKS : 1;
};
uint64_t raw;
} ECX;
union
{
struct
{
/** @brief Reserved */
uint64_t Reserved0 : 2;
/** @brief AVX-512 4-register Neural Network Instructions */
uint64_t AVX512_4VNNIW : 1;
/** @brief AVX-512 4-register Multiply Accumulation Single Precision */
uint64_t AVX512_4FMAPS : 1;
/** @brief Fast Short REP MOVSB/STOSB */
uint64_t FSRM : 1;
/** @brief User Inter-Processor Interrupts */
uint64_t UINTR : 1;
/** @brief Reserved */
uint64_t Reserved1 : 2;
/** @brief AVX-512 VP2INTERSECT Doubleword and Quadword Instructions */
uint64_t AVX512_VP2INTERSECT : 1;
/** @brief Special Register Buffer Data Sampling Mitigations */
uint64_t SRBDS_CTRL : 1;
/** @brief VERW instruction clears CPU buffers */
uint64_t MC_CLEAR : 1;
/** @brief All TSX transactions are aborted */
uint64_t TSX_FORCE_ABORT : 1;
/** @brief Reserved */
uint64_t Reserved2 : 1;
/** @brief TSX_FORCE_ABORT MSR is available */
uint64_t TsxForceAbortMsr : 1;
/** @brief SERIALIZE */
uint64_t SERIALIZE : 1;
/** @brief Mixture of CPU types in processor topology */
uint64_t HYBRID : 1;
/** @brief TSXLDTRK */
uint64_t TSXLDTRK : 1;
/** @brief Reserved */
uint64_t Reserved3 : 1;
/** @brief Platform configuration for Memory Encryption Technologies Instrctuions */
uint64_t PCONFIG : 1;
/** @brief Architectural Last Branch Records */
uint64_t LBR : 1;
/** @brief Control flow enforcement (CET) indirect branch tracking */
uint64_t CET_IBT : 1;
/** @brief Reserved */
uint64_t Reserved4 : 1;
/** @brief Tile computation on bfloat16 numbers */
uint64_t AMX_BF16 : 1;
/** @brief AVX512-FP16 half-precision floating-point instructions */
uint64_t AVX512_FP16 : 1;
/** @brief Tile architecture */
uint64_t AMX_TILE : 1;
/** @brief Tile computation on 8-bit integers */
uint64_t AMX_INT8 : 1;
/** @brief Speculation Control, part of Indirect Branch Control (IBC):
Indirect Branch Restricted Speculation (IBRS) and
Indirect Branch Prediction Barrier (IBPB) */
uint64_t SPEC_CTRL : 1;
/** @brief Single Thread Indirect Branch Predictor, part of IBC */
uint64_t STIBP : 1;
/** @brief IA32_FLUSH_CMD MSR */
uint64_t L1D_FLUSH : 1;
/** @brief IA32_ARCH_CAPABILITIES (lists speculative side channel mitigations */
uint64_t ArchCapabilities : 1;
/** @brief IA32_CORE_CAPABILITIES MSR (lists model-specific core capabilities) */
uint64_t CoreCapabilities : 1;
/** @brief Speculative Store Bypass Disable, as mitigation for Speculative Store Bypass (IA32_SPEC_CTRL) */
uint64_t SSBD : 1;
};
uint64_t raw;
} EDX;
};
/** @brief Extended feature flags enumeration */
struct CPUID0x7_1
{
union
{
struct
{
uint64_t Reserved0 : 3;
/** @brief RAO-INT */
uint64_t RAO_INT : 1;
/** @brief AVX Vector Neural Network Instructions (XNNI) (VEX encoded) */
uint64_t AVX_VNNI : 1;
/** @brief AVX-512 instructions for bfloat16 numbers */
uint64_t AVX512_BF16 : 1;
/** @brief Reserved */
uint64_t Reserved1 : 1;
/** @brief CMPccXADD */
uint64_t CMPCCXADD : 1;
/** @brief Architectural Performance Monitoring Extended Leaf (EAX=23h) */
uint64_t ARCHPERFMONEXT : 1;
/** @brief Reserved */
uint64_t Reserved2 : 1;
/** @brief Fast zero-length MOVSB */
uint64_t FAST_ZERO_REP_MOVSB : 1;
/** @brief Fast zero-length STOSB */
uint64_t FAST_SHORT_REP_STOSB : 1;
/** @brief Fast zero-length CMPSB and SCASB */
uint64_t FAST_SHORT_REP_CMPSB_SCASB : 1;
/** @brief Reserved */
uint64_t Reserved3 : 4;
/** @brief Flexible Return and Event Delivery */
uint64_t FRED : 1;
/** @brief LKGS Instruction */
uint64_t LKGS : 1;
/** @brief WRMSRNS instruction */
uint64_t WRMSRNS : 1;
/** @brief Reserved */
uint64_t Reserved4 : 1;
/** @brief AMX instructions for FP16 numbers */
uint64_t AMX_FP16 : 1;
/** @brief HRESET instruction, IA32_HRESET_ENABLE MSR, and Processor History Reset Leaf (EAX=20h) */
uint64_t HRESET : 1;
/** @brief AVX IFMA instructions */
uint64_t AVX_IFMA : 1;
/** @brief Reserved */
uint64_t Reserved5 : 2;
/** @brief Linear Address Masking */
uint64_t LAM : 1;
/** @brief RDMSRLIST and WRMSRLIST instructions, and the IA32_BARRIER MSR */
uint64_t MSRLIST : 1;
};
uint64_t raw;
} EAX;
union
{
struct
{
/** @brief IA32_PPIN and IA32_PPIN_CTL MSRs */
uint64_t PPIN : 1;
/** @brief Reserved */
uint64_t Reserved : 31;
};
uint64_t raw;
} EBX;
union
{
struct
{
/** @brief Reserved */
uint64_t Reserved : 32;
};
uint64_t raw;
} ECX;
union
{
struct
{
/** @brief Reserved */
uint64_t Reserved0 : 4;
/** @brief AVX VNNI INT8 instructions */
uint64_t AVX_VNNI_INT8 : 1;
/** @brief AVX NE CONVERT instructions */
uint64_t AVX_NE_CONVERT : 1;
/** @brief Reserved */
uint64_t Reserved1 : 8;
/** @brief PREFETCHIT0 and PREFETCHIT1 instructions */
uint64_t PREFETCHIT : 1;
/** @brief Reserved */
uint64_t Reserved2 : 17;
};
uint64_t raw;
} EDX;
};
/** @brief Performance monitors */
struct CPUID0xA
{
@ -4661,8 +5048,9 @@ namespace CPU
{
struct
{
uint64_t BranchID : 16;
uint64_t Reserved0 : 16;
uint64_t BrandId : 16;
uint64_t Reserved0 : 12;
uint64_t PkgType : 4;
};
uint64_t raw;
} EBX;
@ -4674,9 +5062,28 @@ namespace CPU
uint64_t LAHF_SAHF : 1;
uint64_t CmpLegacy : 1;
uint64_t SVM : 1;
uint64_t Reserved0 : 1;
uint64_t ExtApicSpace : 1;
uint64_t AltMovCr8 : 1;
uint64_t Reserved1 : 26;
uint64_t ABM : 1;
uint64_t SSE4A : 1;
uint64_t MisalignedSSE : 1;
uint64_t ThreeDNowPrefetch : 1;
uint64_t OSVW : 1;
uint64_t IBS : 1;
uint64_t XOP : 1;
uint64_t SKINIT : 1;
uint64_t WDT : 1;
uint64_t Reserved0 : 1;
uint64_t LWP : 1;
uint64_t FMA4 : 1;
uint64_t Reserved1 : 1;
uint64_t Reserved2 : 1;
uint64_t NodeID : 1;
uint64_t Reserved3 : 1;
uint64_t TBM : 1;
uint64_t TopologyExtensions : 1;
uint64_t Reserved4 : 9;
};
uint64_t raw;
} ECX;

View File

@ -11,7 +11,7 @@
struct CPUArchData
{
#if defined(__amd64__)
int stub;
CPU::x64::FXState *FPU;
/* TODO */
#elif defined(__i386__)
#elif defined(__aarch64__)
@ -39,7 +39,7 @@ struct CPUData
Tasking::TCB *CurrentThread;
/** @brief Architecture-specific data. */
CPUArchData *Data;
CPUArchData Data;
/** @brief Checksum. Used to verify the integrity of the data. Must be equal to CPU_DATA_CHECKSUM (0xC0FFEE). */
int Checksum;
} __attribute__((packed));

View File

@ -57,32 +57,6 @@ namespace Tasking
Terminated
};
struct FXState
{
/** @brief FPU control word */
uint16_t fcw;
/** @brief FPU status word */
uint16_t fsw;
/** @brief FPU tag words */
uint8_t ftw;
/** @brief Reserved (zero) */
uint8_t Reserved;
/** @brief FPU opcode */
uint16_t fop;
/** @brief PFU instruction pointer */
uint64_t rip;
/** @brief FPU data pointer */
uint64_t rdp;
/** @brief SSE control register */
uint32_t mxcsr;
/** @brief SSE control register mask */
uint32_t mxcsrmask;
/** @brief FPU registers (last 6 bytes reserved) */
uint8_t st[8][16];
/** @brief XMM registers */
uint8_t xmm[16][16];
} __attribute__((packed));
struct TaskSecurity
{
TaskTrustLevel TrustLevel;
@ -130,7 +104,7 @@ namespace Tasking
uintptr_t IPHistory[128];
TaskSecurity Security;
TaskInfo Info;
FXState *FPU;
CPU::x64::FXState *FPU;
void Rename(const char *name)
{