mirror of
https://github.com/Fennix-Project/Kernel.git
synced 2025-05-28 15:34:33 +00:00
Kernel now uses SSE instructions properly
This commit is contained in:
parent
450fe4f0ac
commit
4501826025
@ -237,13 +237,42 @@ namespace APIC
|
|||||||
this->APICBaseAddress = BaseLow << 12u | BaseHigh << 32u;
|
this->APICBaseAddress = BaseLow << 12u | BaseHigh << 32u;
|
||||||
trace("APIC Address: %#lx", this->APICBaseAddress);
|
trace("APIC Address: %#lx", this->APICBaseAddress);
|
||||||
|
|
||||||
uint32_t rcx;
|
bool x2APICSupported = false;
|
||||||
cpuid(1, 0, 0, &rcx, 0);
|
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
|
||||||
if (rcx & CPUID_FEAT_RCX_x2APIC)
|
|
||||||
{
|
{
|
||||||
// this->x2APICSupported = true;
|
#if defined(__amd64__)
|
||||||
warn("x2APIC not supported yet.");
|
CPU::x64::AMD::CPUID0x1 cpuid1amd;
|
||||||
// wrmsr(MSR_APIC_BASE, (rdmsr(MSR_APIC_BASE) | (1 << 11)) & ~(1 << 10));
|
#elif defined(__i386__)
|
||||||
|
CPU::x32::AMD::CPUID0x1 cpuid1amd;
|
||||||
|
#endif
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
asmv("cpuid"
|
||||||
|
: "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw)
|
||||||
|
: "a"(0x1));
|
||||||
|
#endif
|
||||||
|
// FIXME: Not sure if I configured this correctly or something else is wrong
|
||||||
|
// x2APICSupported = cpuid1amd.ECX.x2APIC;
|
||||||
|
fixme("AMD does even support x2APIC? ECX->Reserved10: %#lx", cpuid1amd.ECX.Reserved10);
|
||||||
|
}
|
||||||
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
|
{
|
||||||
|
#if defined(__amd64__)
|
||||||
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
#elif defined(__i386__)
|
||||||
|
CPU::x32::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
#endif
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
asmv("cpuid"
|
||||||
|
: "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw)
|
||||||
|
: "a"(0x1));
|
||||||
|
#endif
|
||||||
|
x2APICSupported = cpuid1intel.ECX.x2APIC;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (x2APICSupported)
|
||||||
|
{
|
||||||
|
this->x2APICSupported = true;
|
||||||
|
wrmsr(MSR_APIC_BASE, (rdmsr(MSR_APIC_BASE) | (1 << 11)) & ~(1 << 10));
|
||||||
BaseStruct.EN = 1;
|
BaseStruct.EN = 1;
|
||||||
wrmsr(MSR_APIC_BASE, BaseStruct.raw);
|
wrmsr(MSR_APIC_BASE, BaseStruct.raw);
|
||||||
}
|
}
|
||||||
|
@ -53,8 +53,8 @@ CPUData *GetCurrentCPU()
|
|||||||
extern "C" void StartCPU()
|
extern "C" void StartCPU()
|
||||||
{
|
{
|
||||||
CPU::Interrupts(CPU::Disable);
|
CPU::Interrupts(CPU::Disable);
|
||||||
CPU::InitializeFeatures();
|
|
||||||
uint64_t CoreID = (int)*reinterpret_cast<int *>(CORE);
|
uint64_t CoreID = (int)*reinterpret_cast<int *>(CORE);
|
||||||
|
CPU::InitializeFeatures(CoreID);
|
||||||
// Initialize GDT and IDT
|
// Initialize GDT and IDT
|
||||||
Interrupts::Initialize(CoreID);
|
Interrupts::Initialize(CoreID);
|
||||||
Interrupts::Enable(CoreID);
|
Interrupts::Enable(CoreID);
|
||||||
|
263
Core/CPU.cpp
263
Core/CPU.cpp
@ -3,26 +3,29 @@
|
|||||||
#include <memory.hpp>
|
#include <memory.hpp>
|
||||||
#include <convert.h>
|
#include <convert.h>
|
||||||
#include <debug.h>
|
#include <debug.h>
|
||||||
|
#include <smp.hpp>
|
||||||
|
|
||||||
#include "../kernel.h"
|
#include "../kernel.h"
|
||||||
|
|
||||||
namespace CPU
|
namespace CPU
|
||||||
{
|
{
|
||||||
|
static bool SSEEnabled = false;
|
||||||
|
|
||||||
char *Vendor()
|
char *Vendor()
|
||||||
{
|
{
|
||||||
static char Vendor[13];
|
static char Vendor[13];
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
uint32_t eax, ebx, ecx, edx;
|
||||||
x64::cpuid(0x0, &rax, &rbx, &rcx, &rdx);
|
x64::cpuid(0x0, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Vendor + 0, &rbx, 4);
|
memcpy_unsafe(Vendor + 0, &ebx, 4);
|
||||||
memcpy(Vendor + 4, &rdx, 4);
|
memcpy_unsafe(Vendor + 4, &edx, 4);
|
||||||
memcpy(Vendor + 8, &rcx, 4);
|
memcpy_unsafe(Vendor + 8, &ecx, 4);
|
||||||
#elif defined(__i386__)
|
#elif defined(__i386__)
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
uint32_t eax, ebx, ecx, edx;
|
||||||
x32::cpuid(0x0, &rax, &rbx, &rcx, &rdx);
|
x32::cpuid(0x0, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Vendor + 0, &rbx, 4);
|
memcpy_unsafe(Vendor + 0, &ebx, 4);
|
||||||
memcpy(Vendor + 4, &rdx, 4);
|
memcpy_unsafe(Vendor + 4, &edx, 4);
|
||||||
memcpy(Vendor + 8, &rcx, 4);
|
memcpy_unsafe(Vendor + 8, &ecx, 4);
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
asmv("mrs %0, MIDR_EL1"
|
asmv("mrs %0, MIDR_EL1"
|
||||||
: "=r"(Vendor[0]));
|
: "=r"(Vendor[0]));
|
||||||
@ -34,39 +37,39 @@ namespace CPU
|
|||||||
{
|
{
|
||||||
static char Name[49];
|
static char Name[49];
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
uint32_t eax, ebx, ecx, edx;
|
||||||
x64::cpuid(0x80000002, &rax, &rbx, &rcx, &rdx);
|
x64::cpuid(0x80000002, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Name + 0, &rax, 4);
|
memcpy_unsafe(Name + 0, &eax, 4);
|
||||||
memcpy(Name + 4, &rbx, 4);
|
memcpy_unsafe(Name + 4, &ebx, 4);
|
||||||
memcpy(Name + 8, &rcx, 4);
|
memcpy_unsafe(Name + 8, &ecx, 4);
|
||||||
memcpy(Name + 12, &rdx, 4);
|
memcpy_unsafe(Name + 12, &edx, 4);
|
||||||
x64::cpuid(0x80000003, &rax, &rbx, &rcx, &rdx);
|
x64::cpuid(0x80000003, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Name + 16, &rax, 4);
|
memcpy_unsafe(Name + 16, &eax, 4);
|
||||||
memcpy(Name + 20, &rbx, 4);
|
memcpy_unsafe(Name + 20, &ebx, 4);
|
||||||
memcpy(Name + 24, &rcx, 4);
|
memcpy_unsafe(Name + 24, &ecx, 4);
|
||||||
memcpy(Name + 28, &rdx, 4);
|
memcpy_unsafe(Name + 28, &edx, 4);
|
||||||
x64::cpuid(0x80000004, &rax, &rbx, &rcx, &rdx);
|
x64::cpuid(0x80000004, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Name + 32, &rax, 4);
|
memcpy_unsafe(Name + 32, &eax, 4);
|
||||||
memcpy(Name + 36, &rbx, 4);
|
memcpy_unsafe(Name + 36, &ebx, 4);
|
||||||
memcpy(Name + 40, &rcx, 4);
|
memcpy_unsafe(Name + 40, &ecx, 4);
|
||||||
memcpy(Name + 44, &rdx, 4);
|
memcpy_unsafe(Name + 44, &edx, 4);
|
||||||
#elif defined(__i386__)
|
#elif defined(__i386__)
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
uint32_t eax, ebx, ecx, edx;
|
||||||
x32::cpuid(0x80000002, &rax, &rbx, &rcx, &rdx);
|
x32::cpuid(0x80000002, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Name + 0, &rax, 4);
|
memcpy_unsafe(Name + 0, &eax, 4);
|
||||||
memcpy(Name + 4, &rbx, 4);
|
memcpy_unsafe(Name + 4, &ebx, 4);
|
||||||
memcpy(Name + 8, &rcx, 4);
|
memcpy_unsafe(Name + 8, &ecx, 4);
|
||||||
memcpy(Name + 12, &rdx, 4);
|
memcpy_unsafe(Name + 12, &edx, 4);
|
||||||
x32::cpuid(0x80000003, &rax, &rbx, &rcx, &rdx);
|
x32::cpuid(0x80000003, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Name + 16, &rax, 4);
|
memcpy_unsafe(Name + 16, &eax, 4);
|
||||||
memcpy(Name + 20, &rbx, 4);
|
memcpy_unsafe(Name + 20, &ebx, 4);
|
||||||
memcpy(Name + 24, &rcx, 4);
|
memcpy_unsafe(Name + 24, &ecx, 4);
|
||||||
memcpy(Name + 28, &rdx, 4);
|
memcpy_unsafe(Name + 28, &edx, 4);
|
||||||
x32::cpuid(0x80000004, &rax, &rbx, &rcx, &rdx);
|
x32::cpuid(0x80000004, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Name + 32, &rax, 4);
|
memcpy_unsafe(Name + 32, &eax, 4);
|
||||||
memcpy(Name + 36, &rbx, 4);
|
memcpy_unsafe(Name + 36, &ebx, 4);
|
||||||
memcpy(Name + 40, &rcx, 4);
|
memcpy_unsafe(Name + 40, &ecx, 4);
|
||||||
memcpy(Name + 44, &rdx, 4);
|
memcpy_unsafe(Name + 44, &edx, 4);
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
asmv("mrs %0, MIDR_EL1"
|
asmv("mrs %0, MIDR_EL1"
|
||||||
: "=r"(Name[0]));
|
: "=r"(Name[0]));
|
||||||
@ -78,17 +81,17 @@ namespace CPU
|
|||||||
{
|
{
|
||||||
static char Hypervisor[13];
|
static char Hypervisor[13];
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
uint32_t eax, ebx, ecx, edx;
|
||||||
x64::cpuid(0x40000000, &rax, &rbx, &rcx, &rdx);
|
x64::cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Hypervisor + 0, &rbx, 4);
|
memcpy_unsafe(Hypervisor + 0, &ebx, 4);
|
||||||
memcpy(Hypervisor + 4, &rcx, 4);
|
memcpy_unsafe(Hypervisor + 4, &ecx, 4);
|
||||||
memcpy(Hypervisor + 8, &rdx, 4);
|
memcpy_unsafe(Hypervisor + 8, &edx, 4);
|
||||||
#elif defined(__i386__)
|
#elif defined(__i386__)
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
uint32_t eax, ebx, ecx, edx;
|
||||||
x64::cpuid(0x40000000, &rax, &rbx, &rcx, &rdx);
|
x64::cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
|
||||||
memcpy(Hypervisor + 0, &rbx, 4);
|
memcpy_unsafe(Hypervisor + 0, &ebx, 4);
|
||||||
memcpy(Hypervisor + 4, &rcx, 4);
|
memcpy_unsafe(Hypervisor + 4, &ecx, 4);
|
||||||
memcpy(Hypervisor + 8, &rdx, 4);
|
memcpy_unsafe(Hypervisor + 8, &edx, 4);
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
asmv("mrs %0, MIDR_EL1"
|
asmv("mrs %0, MIDR_EL1"
|
||||||
: "=r"(Hypervisor[0]));
|
: "=r"(Hypervisor[0]));
|
||||||
@ -171,15 +174,51 @@ namespace CPU
|
|||||||
return PT;
|
return PT;
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitializeFeatures()
|
void InitializeFeatures(long Core)
|
||||||
{
|
{
|
||||||
|
bool PGESupport = false;
|
||||||
|
bool SSESupport = false;
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
static int BSP = 0;
|
static int BSP = 0;
|
||||||
x64::CR0 cr0 = x64::readcr0();
|
x64::CR0 cr0 = x64::readcr0();
|
||||||
x64::CR4 cr4 = x64::readcr4();
|
x64::CR4 cr4 = x64::readcr4();
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
|
||||||
x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx);
|
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
|
||||||
if (rdx & x64::CPUID_FEAT_RDX_PGE)
|
{
|
||||||
|
#if defined(__amd64__)
|
||||||
|
CPU::x64::AMD::CPUID0x1 cpuid1amd;
|
||||||
|
#elif defined(__i386__)
|
||||||
|
CPU::x32::AMD::CPUID0x1 cpuid1amd;
|
||||||
|
#endif
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
asmv("cpuid"
|
||||||
|
: "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw)
|
||||||
|
: "a"(0x1));
|
||||||
|
#endif
|
||||||
|
if (cpuid1amd.EDX.PGE)
|
||||||
|
PGESupport = true;
|
||||||
|
if (cpuid1amd.EDX.SSE)
|
||||||
|
SSESupport = true;
|
||||||
|
}
|
||||||
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
|
{
|
||||||
|
#if defined(__amd64__)
|
||||||
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
#elif defined(__i386__)
|
||||||
|
CPU::x32::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
#endif
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
asmv("cpuid"
|
||||||
|
: "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw)
|
||||||
|
: "a"(0x1));
|
||||||
|
#endif
|
||||||
|
if (cpuid1intel.EDX.PGE)
|
||||||
|
PGESupport = true;
|
||||||
|
if (cpuid1intel.EDX.SSE)
|
||||||
|
SSESupport = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PGESupport)
|
||||||
{
|
{
|
||||||
debug("Enabling global pages support...");
|
debug("Enabling global pages support...");
|
||||||
if (!BSP)
|
if (!BSP)
|
||||||
@ -187,16 +226,29 @@ namespace CPU
|
|||||||
cr4.PGE = 1;
|
cr4.PGE = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rdx & x64::CPUID_FEAT_RDX_SSE)
|
bool SSEEnableAfter = false;
|
||||||
{
|
|
||||||
debug("Enabling SSE support...");
|
if (strcmp(CPU::Hypervisor(), x86_CPUID_VENDOR_TCG) != 0) /* Not sure if my code is not working properly or something else is the issue. */
|
||||||
if (!BSP)
|
if (SSESupport)
|
||||||
KPrint("SSE is supported.");
|
{
|
||||||
cr0.EM = 0;
|
debug("Enabling SSE support...");
|
||||||
cr0.MP = 1;
|
if (!BSP)
|
||||||
cr4.OSFXSR = 1;
|
KPrint("SSE is supported.");
|
||||||
cr4.OSXMMEXCPT = 1;
|
cr0.EM = 0;
|
||||||
}
|
cr0.MP = 1;
|
||||||
|
cr4.OSFXSR = 1;
|
||||||
|
cr4.OSXMMEXCPT = 1;
|
||||||
|
|
||||||
|
CPUData *CoreData = GetCPU(Core);
|
||||||
|
CoreData->Data.FPU = (CPU::x64::FXState *)KernelAllocator.RequestPages(TO_PAGES(sizeof(CPU::x64::FXState)));
|
||||||
|
memset(CoreData->Data.FPU, 0, FROM_PAGES(TO_PAGES(sizeof(CPU::x64::FXState))));
|
||||||
|
CoreData->Data.FPU->mxcsr = 0b0001111110000000;
|
||||||
|
CoreData->Data.FPU->mxcsrmask = 0b1111111110111111;
|
||||||
|
CoreData->Data.FPU->fcw = 0b0000001100111111;
|
||||||
|
CPU::x64::fxrstor(CoreData->Data.FPU);
|
||||||
|
|
||||||
|
SSEEnableAfter = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (!BSP)
|
if (!BSP)
|
||||||
KPrint("Enabling CPU cache.");
|
KPrint("Enabling CPU cache.");
|
||||||
@ -207,24 +259,26 @@ namespace CPU
|
|||||||
|
|
||||||
x64::writecr0(cr0);
|
x64::writecr0(cr0);
|
||||||
|
|
||||||
|
// FIXME: I don't think this is reporting correctly. This has to be fixed asap.
|
||||||
debug("Enabling UMIP, SMEP & SMAP support...");
|
debug("Enabling UMIP, SMEP & SMAP support...");
|
||||||
x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx);
|
uint32_t eax, ebx, ecx, edx;
|
||||||
if (rdx & x64::CPUID_FEAT_RDX_UMIP) // https://en.wikipedia.org/wiki/Control_register
|
x64::cpuid(0x1, &eax, &ebx, &ecx, &edx);
|
||||||
|
if (edx & (1 << 2)) // https://en.wikipedia.org/wiki/Control_register
|
||||||
{
|
{
|
||||||
if (!BSP)
|
if (!BSP)
|
||||||
KPrint("UMIP is supported.");
|
KPrint("UMIP is supported.");
|
||||||
debug("UMIP is supported.");
|
debug("UMIP is supported.");
|
||||||
// cr4.UMIP = 1;
|
// cr4.UMIP = 1;
|
||||||
}
|
}
|
||||||
if (rdx & x64::CPUID_FEAT_RDX_SMEP) // https://en.wikipedia.org/wiki/Control_register#SMEP
|
if (edx & (1 << 7)) // https://en.wikipedia.org/wiki/Control_register#SMEP
|
||||||
// https://web.archive.org/web/20160312223150/http://ncsi.com/nsatc11/presentations/wednesday/emerging_technologies/fischer.pdf
|
// https://web.archive.org/web/20160312223150/http://ncsi.com/nsatc11/presentations/wednesday/emerging_technologies/fischer.pdf
|
||||||
{
|
{
|
||||||
if (!BSP)
|
if (!BSP)
|
||||||
KPrint("SMEP is supported.");
|
KPrint("SMEP is supported.");
|
||||||
debug("SMEP is supported.");
|
debug("SMEP is supported.");
|
||||||
// cr4.SMEP = 1;
|
// cr4.SMEP = 1;
|
||||||
}
|
}
|
||||||
if (rdx & x64::CPUID_FEAT_RDX_SMAP) // https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention
|
if (edx & (1 << 20)) // https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention
|
||||||
{
|
{
|
||||||
if (!BSP)
|
if (!BSP)
|
||||||
KPrint("SMAP is supported.");
|
KPrint("SMAP is supported.");
|
||||||
@ -252,6 +306,8 @@ namespace CPU
|
|||||||
x64::wrmsr(x64::MSR_CR_PAT, 0x6 | (0x0 << 8) | (0x1 << 16));
|
x64::wrmsr(x64::MSR_CR_PAT, 0x6 | (0x0 << 8) | (0x1 << 16));
|
||||||
if (!BSP++)
|
if (!BSP++)
|
||||||
trace("Features for BSP initialized.");
|
trace("Features for BSP initialized.");
|
||||||
|
if (SSEEnableAfter)
|
||||||
|
SSEEnabled = true;
|
||||||
#elif defined(__i386__)
|
#elif defined(__i386__)
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
#endif
|
#endif
|
||||||
@ -276,6 +332,14 @@ namespace CPU
|
|||||||
|
|
||||||
x86SIMDType CheckSIMD()
|
x86SIMDType CheckSIMD()
|
||||||
{
|
{
|
||||||
|
if (unlikely(!SSEEnabled))
|
||||||
|
return SIMD_NONE;
|
||||||
|
|
||||||
|
static x86SIMDType SIMDType = SIMD_NONE;
|
||||||
|
|
||||||
|
if (likely(SIMDType != SIMD_NONE))
|
||||||
|
return SIMDType;
|
||||||
|
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
|
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
@ -289,17 +353,32 @@ namespace CPU
|
|||||||
: "a"(0x1));
|
: "a"(0x1));
|
||||||
#endif
|
#endif
|
||||||
if (cpuid1amd.ECX.SSE4_2)
|
if (cpuid1amd.ECX.SSE4_2)
|
||||||
return SIMD_SSE42;
|
SIMDType = SIMD_SSE42;
|
||||||
else if (cpuid1amd.ECX.SSE4_1)
|
else if (cpuid1amd.ECX.SSE4_1)
|
||||||
return SIMD_SSE41;
|
SIMDType = SIMD_SSE41;
|
||||||
else if (cpuid1amd.ECX.SSE3)
|
else if (cpuid1amd.ECX.SSE3)
|
||||||
return SIMD_SSE3;
|
SIMDType = SIMD_SSE3;
|
||||||
else if (cpuid1amd.EDX.SSE2)
|
else if (cpuid1amd.EDX.SSE2)
|
||||||
return SIMD_SSE2;
|
SIMDType = SIMD_SSE2;
|
||||||
else if (cpuid1amd.EDX.SSE)
|
else if (cpuid1amd.EDX.SSE)
|
||||||
return SIMD_SSE;
|
SIMDType = SIMD_SSE;
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
if (cpuid1amd.ECX.SSE4_2)
|
||||||
|
debug("SSE4.2 is supported.");
|
||||||
|
if (cpuid1amd.ECX.SSE4_1)
|
||||||
|
debug("SSE4.1 is supported.");
|
||||||
|
if (cpuid1amd.ECX.SSE3)
|
||||||
|
debug("SSE3 is supported.");
|
||||||
|
if (cpuid1amd.EDX.SSE2)
|
||||||
|
debug("SSE2 is supported.");
|
||||||
|
if (cpuid1amd.EDX.SSE)
|
||||||
|
debug("SSE is supported.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return SIMDType;
|
||||||
}
|
}
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
@ -312,15 +391,30 @@ namespace CPU
|
|||||||
: "a"(0x1));
|
: "a"(0x1));
|
||||||
#endif
|
#endif
|
||||||
if (cpuid1intel.ECX.SSE4_2)
|
if (cpuid1intel.ECX.SSE4_2)
|
||||||
return SIMD_SSE42;
|
SIMDType = SIMD_SSE42;
|
||||||
else if (cpuid1intel.ECX.SSE4_1)
|
else if (cpuid1intel.ECX.SSE4_1)
|
||||||
return SIMD_SSE41;
|
SIMDType = SIMD_SSE41;
|
||||||
else if (cpuid1intel.ECX.SSE3)
|
else if (cpuid1intel.ECX.SSE3)
|
||||||
return SIMD_SSE3;
|
SIMDType = SIMD_SSE3;
|
||||||
else if (cpuid1intel.EDX.SSE2)
|
else if (cpuid1intel.EDX.SSE2)
|
||||||
return SIMD_SSE2;
|
SIMDType = SIMD_SSE2;
|
||||||
else if (cpuid1intel.EDX.SSE)
|
else if (cpuid1intel.EDX.SSE)
|
||||||
return SIMD_SSE;
|
SIMDType = SIMD_SSE;
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
if (cpuid1intel.ECX.SSE4_2)
|
||||||
|
debug("SSE4.2 is supported.");
|
||||||
|
if (cpuid1intel.ECX.SSE4_1)
|
||||||
|
debug("SSE4.1 is supported.");
|
||||||
|
if (cpuid1intel.ECX.SSE3)
|
||||||
|
debug("SSE3 is supported.");
|
||||||
|
if (cpuid1intel.EDX.SSE2)
|
||||||
|
debug("SSE2 is supported.");
|
||||||
|
if (cpuid1intel.EDX.SSE)
|
||||||
|
debug("SSE is supported.");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return SIMDType;
|
||||||
}
|
}
|
||||||
|
|
||||||
return SIMD_NONE;
|
return SIMD_NONE;
|
||||||
@ -328,6 +422,9 @@ namespace CPU
|
|||||||
|
|
||||||
bool CheckSIMD(x86SIMDType Type)
|
bool CheckSIMD(x86SIMDType Type)
|
||||||
{
|
{
|
||||||
|
if (unlikely(!SSEEnabled))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
|
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
@ -351,7 +448,7 @@ namespace CPU
|
|||||||
else if (Type == SIMD_SSE)
|
else if (Type == SIMD_SSE)
|
||||||
return cpuid1amd.EDX.SSE;
|
return cpuid1amd.EDX.SSE;
|
||||||
}
|
}
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
@ -22,7 +22,7 @@ namespace Random
|
|||||||
#endif
|
#endif
|
||||||
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
||||||
}
|
}
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
@ -71,7 +71,7 @@ namespace Random
|
|||||||
#endif
|
#endif
|
||||||
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
||||||
}
|
}
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
@ -120,7 +120,7 @@ namespace Random
|
|||||||
#endif
|
#endif
|
||||||
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
||||||
}
|
}
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
@ -129,24 +129,6 @@ namespace GraphicalUserInterface
|
|||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32_t DesktopFadeEffect[] = {
|
|
||||||
0xFF000000,
|
|
||||||
0x010101,
|
|
||||||
0x040404,
|
|
||||||
0x080808,
|
|
||||||
0x101010,
|
|
||||||
0x121212,
|
|
||||||
0x151515,
|
|
||||||
0x181818,
|
|
||||||
0x1A1A1A,
|
|
||||||
0x1D1D1D,
|
|
||||||
0x1F1F1F,
|
|
||||||
0x222222,
|
|
||||||
0x242424,
|
|
||||||
0x262626,
|
|
||||||
0x282828,
|
|
||||||
};
|
|
||||||
|
|
||||||
char CloseButton[] = {
|
char CloseButton[] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
||||||
@ -262,22 +244,6 @@ namespace GraphicalUserInterface
|
|||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32_t CloseButtonFade[] = {
|
|
||||||
0x404040,
|
|
||||||
0x770000,
|
|
||||||
0x990000,
|
|
||||||
0xBB0000,
|
|
||||||
0xDD0000,
|
|
||||||
0xFF0000,
|
|
||||||
};
|
|
||||||
|
|
||||||
uint32_t MaximizeMinimizeButtonFade[] = {
|
|
||||||
0x404040,
|
|
||||||
0x454545,
|
|
||||||
0x505050,
|
|
||||||
0x5F5F5F,
|
|
||||||
};
|
|
||||||
|
|
||||||
O1 void GUI::FetchInputs()
|
O1 void GUI::FetchInputs()
|
||||||
{
|
{
|
||||||
KernelCallback callback;
|
KernelCallback callback;
|
||||||
|
@ -34,6 +34,12 @@
|
|||||||
* CREDITS AND REFERENCES:
|
* CREDITS AND REFERENCES:
|
||||||
* - General:
|
* - General:
|
||||||
* https://wiki.osdev.org/Main_Page
|
* https://wiki.osdev.org/Main_Page
|
||||||
|
*
|
||||||
|
* - CPU XCR0 structure:
|
||||||
|
* https://wiki.osdev.org/CPU_Registers_x86#XCR0
|
||||||
|
*
|
||||||
|
* - CPUID 0x7:
|
||||||
|
* https://en.wikipedia.org/wiki/CPUID
|
||||||
*
|
*
|
||||||
* - Network:
|
* - Network:
|
||||||
* https://web.archive.org/web/20051210132103/http://users.pcnet.ro/dmoroian/beej/Beej.html
|
* https://web.archive.org/web/20051210132103/http://users.pcnet.ro/dmoroian/beej/Beej.html
|
||||||
@ -123,7 +129,7 @@ EXTERNC __no_instrument_function void Main(BootInfo *Info)
|
|||||||
KPrint("Initializing GDT and IDT");
|
KPrint("Initializing GDT and IDT");
|
||||||
Interrupts::Initialize(0);
|
Interrupts::Initialize(0);
|
||||||
KPrint("Initializing CPU Features");
|
KPrint("Initializing CPU Features");
|
||||||
CPU::InitializeFeatures();
|
CPU::InitializeFeatures(0);
|
||||||
KPrint("Loading Kernel Symbols");
|
KPrint("Loading Kernel Symbols");
|
||||||
KernelSymbolTable = new SymbolResolver::Symbols((uintptr_t)Info->Kernel.FileBase);
|
KernelSymbolTable = new SymbolResolver::Symbols((uintptr_t)Info->Kernel.FileBase);
|
||||||
KPrint("Reading Kernel Parameters");
|
KPrint("Reading Kernel Parameters");
|
||||||
|
@ -3,346 +3,191 @@
|
|||||||
#include <memory.hpp>
|
#include <memory.hpp>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <debug.h>
|
#include <debug.h>
|
||||||
|
#include <cpu.hpp>
|
||||||
|
|
||||||
// TODO: Replace mem* with assembly code
|
EXTERNC void *memcpy_sse(void *dest, const void *src, size_t n)
|
||||||
|
|
||||||
/* Some of the functions are from musl library */
|
|
||||||
/* https://www.musl-libc.org/ */
|
|
||||||
/*
|
|
||||||
Copyright © 2005-2020 Rich Felker, et al.
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
a copy of this software and associated documentation files (the
|
|
||||||
"Software"), to deal in the Software without restriction, including
|
|
||||||
without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be
|
|
||||||
included in all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
||||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
||||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void *memcpy_unsafe(void *dest, const void *src, size_t n)
|
|
||||||
{
|
{
|
||||||
unsigned char *d = dest;
|
char *d = (char *)dest;
|
||||||
const unsigned char *s = src;
|
const char *s = (const char *)src;
|
||||||
|
|
||||||
#ifdef __GNUC__
|
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
|
||||||
|
|
||||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
||||||
#define LS >>
|
|
||||||
#define RS <<
|
|
||||||
#else
|
|
||||||
#define LS <<
|
|
||||||
#define RS >>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef uint32_t __attribute__((__may_alias__)) u32;
|
|
||||||
uint32_t w, x;
|
|
||||||
|
|
||||||
for (; (uintptr_t)s % 4 && n; n--)
|
|
||||||
*d++ = *s++;
|
|
||||||
|
|
||||||
if ((uintptr_t)d % 4 == 0)
|
|
||||||
{
|
{
|
||||||
for (; n >= 16; s += 16, d += 16, n -= 16)
|
size_t num_vectors = n / 16;
|
||||||
|
for (size_t i = 0; i < num_vectors; i++)
|
||||||
{
|
{
|
||||||
*(u32 *)(d + 0) = *(u32 *)(s + 0);
|
asmv("movaps (%0), %%xmm0\n"
|
||||||
*(u32 *)(d + 4) = *(u32 *)(s + 4);
|
"movaps %%xmm0, (%1)\n"
|
||||||
*(u32 *)(d + 8) = *(u32 *)(s + 8);
|
:
|
||||||
*(u32 *)(d + 12) = *(u32 *)(s + 12);
|
: "r"(s), "r"(d)
|
||||||
|
: "xmm0");
|
||||||
|
d += 16;
|
||||||
|
s += 16;
|
||||||
}
|
}
|
||||||
if (n & 8)
|
|
||||||
|
n -= num_vectors * 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy_unsafe(d, s, n);
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXTERNC void *memcpy_sse2(void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
char *d = (char *)dest;
|
||||||
|
const char *s = (const char *)src;
|
||||||
|
|
||||||
|
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
|
||||||
|
{
|
||||||
|
size_t num_vectors = n / 16;
|
||||||
|
for (size_t i = 0; i < num_vectors; i++)
|
||||||
{
|
{
|
||||||
*(u32 *)(d + 0) = *(u32 *)(s + 0);
|
asmv("movdqa (%0), %%xmm0\n"
|
||||||
*(u32 *)(d + 4) = *(u32 *)(s + 4);
|
"movdqa %%xmm0, (%1)\n"
|
||||||
|
:
|
||||||
|
: "r"(s), "r"(d)
|
||||||
|
: "xmm0");
|
||||||
|
d += 16;
|
||||||
|
s += 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
n -= num_vectors * 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy_unsafe(d, s, n);
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXTERNC void *memcpy_sse3(void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
char *d = (char *)dest;
|
||||||
|
const char *s = (const char *)src;
|
||||||
|
|
||||||
|
if ((((uintptr_t)d | (uintptr_t)s) & 0x7) == 0)
|
||||||
|
{
|
||||||
|
size_t num_vectors = n / 8;
|
||||||
|
for (size_t i = 0; i < num_vectors; i++)
|
||||||
|
{
|
||||||
|
asmv("movq (%0), %%xmm0\n"
|
||||||
|
"movddup %%xmm0, %%xmm1\n"
|
||||||
|
"movq %%xmm1, (%1)\n"
|
||||||
|
:
|
||||||
|
: "r"(s), "r"(d)
|
||||||
|
: "xmm0", "xmm1");
|
||||||
d += 8;
|
d += 8;
|
||||||
s += 8;
|
s += 8;
|
||||||
}
|
}
|
||||||
if (n & 4)
|
|
||||||
{
|
n -= num_vectors * 8;
|
||||||
*(u32 *)(d + 0) = *(u32 *)(s + 0);
|
|
||||||
d += 4;
|
|
||||||
s += 4;
|
|
||||||
}
|
|
||||||
if (n & 2)
|
|
||||||
{
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
}
|
|
||||||
if (n & 1)
|
|
||||||
{
|
|
||||||
*d = *s;
|
|
||||||
}
|
|
||||||
return dest;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n >= 32)
|
memcpy_unsafe(d, s, n);
|
||||||
switch ((uintptr_t)d % 4)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
w = *(u32 *)s;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
n -= 3;
|
|
||||||
for (; n >= 17; s += 16, d += 16, n -= 16)
|
|
||||||
{
|
|
||||||
x = *(u32 *)(s + 1);
|
|
||||||
*(u32 *)(d + 0) = (w LS 24) | (x RS 8);
|
|
||||||
w = *(u32 *)(s + 5);
|
|
||||||
*(u32 *)(d + 4) = (x LS 24) | (w RS 8);
|
|
||||||
x = *(u32 *)(s + 9);
|
|
||||||
*(u32 *)(d + 8) = (w LS 24) | (x RS 8);
|
|
||||||
w = *(u32 *)(s + 13);
|
|
||||||
*(u32 *)(d + 12) = (x LS 24) | (w RS 8);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
w = *(u32 *)s;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
n -= 2;
|
|
||||||
for (; n >= 18; s += 16, d += 16, n -= 16)
|
|
||||||
{
|
|
||||||
x = *(u32 *)(s + 2);
|
|
||||||
*(u32 *)(d + 0) = (w LS 16) | (x RS 16);
|
|
||||||
w = *(u32 *)(s + 6);
|
|
||||||
*(u32 *)(d + 4) = (x LS 16) | (w RS 16);
|
|
||||||
x = *(u32 *)(s + 10);
|
|
||||||
*(u32 *)(d + 8) = (w LS 16) | (x RS 16);
|
|
||||||
w = *(u32 *)(s + 14);
|
|
||||||
*(u32 *)(d + 12) = (x LS 16) | (w RS 16);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
w = *(u32 *)s;
|
|
||||||
*d++ = *s++;
|
|
||||||
n -= 1;
|
|
||||||
for (; n >= 19; s += 16, d += 16, n -= 16)
|
|
||||||
{
|
|
||||||
x = *(u32 *)(s + 3);
|
|
||||||
*(u32 *)(d + 0) = (w LS 8) | (x RS 24);
|
|
||||||
w = *(u32 *)(s + 7);
|
|
||||||
*(u32 *)(d + 4) = (x LS 8) | (w RS 24);
|
|
||||||
x = *(u32 *)(s + 11);
|
|
||||||
*(u32 *)(d + 8) = (w LS 8) | (x RS 24);
|
|
||||||
w = *(u32 *)(s + 15);
|
|
||||||
*(u32 *)(d + 12) = (x LS 8) | (w RS 24);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (n & 16)
|
|
||||||
{
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
}
|
|
||||||
if (n & 8)
|
|
||||||
{
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
}
|
|
||||||
if (n & 4)
|
|
||||||
{
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
}
|
|
||||||
if (n & 2)
|
|
||||||
{
|
|
||||||
*d++ = *s++;
|
|
||||||
*d++ = *s++;
|
|
||||||
}
|
|
||||||
if (n & 1)
|
|
||||||
{
|
|
||||||
*d = *s;
|
|
||||||
}
|
|
||||||
return dest;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (; n; n--)
|
|
||||||
*d++ = *s++;
|
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *memset_unsafe(void *dest, int c, size_t n)
|
EXTERNC void *memcpy_ssse3(void *dest, const void *src, size_t n)
|
||||||
{
|
{
|
||||||
unsigned char *s = dest;
|
char *d = (char *)dest;
|
||||||
size_t k;
|
const char *s = (const char *)src;
|
||||||
|
|
||||||
if (!n)
|
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
|
||||||
return dest;
|
|
||||||
s[0] = c;
|
|
||||||
s[n - 1] = c;
|
|
||||||
if (n <= 2)
|
|
||||||
return dest;
|
|
||||||
s[1] = c;
|
|
||||||
s[2] = c;
|
|
||||||
s[n - 2] = c;
|
|
||||||
s[n - 3] = c;
|
|
||||||
if (n <= 6)
|
|
||||||
return dest;
|
|
||||||
s[3] = c;
|
|
||||||
s[n - 4] = c;
|
|
||||||
if (n <= 8)
|
|
||||||
return dest;
|
|
||||||
|
|
||||||
k = -(uintptr_t)s & 3;
|
|
||||||
s += k;
|
|
||||||
n -= k;
|
|
||||||
n &= -4;
|
|
||||||
|
|
||||||
#ifdef __GNUC__
|
|
||||||
typedef uint32_t __attribute__((__may_alias__)) u32;
|
|
||||||
typedef uint64_t __attribute__((__may_alias__)) u64;
|
|
||||||
|
|
||||||
u32 c32 = ((u32)-1) / 255 * (unsigned char)c;
|
|
||||||
*(u32 *)(s + 0) = c32;
|
|
||||||
*(u32 *)(s + n - 4) = c32;
|
|
||||||
if (n <= 8)
|
|
||||||
return dest;
|
|
||||||
*(u32 *)(s + 4) = c32;
|
|
||||||
*(u32 *)(s + 8) = c32;
|
|
||||||
*(u32 *)(s + n - 12) = c32;
|
|
||||||
*(u32 *)(s + n - 8) = c32;
|
|
||||||
if (n <= 24)
|
|
||||||
return dest;
|
|
||||||
*(u32 *)(s + 12) = c32;
|
|
||||||
*(u32 *)(s + 16) = c32;
|
|
||||||
*(u32 *)(s + 20) = c32;
|
|
||||||
*(u32 *)(s + 24) = c32;
|
|
||||||
*(u32 *)(s + n - 28) = c32;
|
|
||||||
*(u32 *)(s + n - 24) = c32;
|
|
||||||
*(u32 *)(s + n - 20) = c32;
|
|
||||||
*(u32 *)(s + n - 16) = c32;
|
|
||||||
|
|
||||||
k = 24 + ((uintptr_t)s & 4);
|
|
||||||
s += k;
|
|
||||||
n -= k;
|
|
||||||
|
|
||||||
u64 c64 = c32 | ((u64)c32 << 32);
|
|
||||||
for (; n >= 32; n -= 32, s += 32)
|
|
||||||
{
|
{
|
||||||
*(u64 *)(s + 0) = c64;
|
size_t num_vectors = n / 16;
|
||||||
*(u64 *)(s + 8) = c64;
|
for (size_t i = 0; i < num_vectors; i++)
|
||||||
*(u64 *)(s + 16) = c64;
|
{
|
||||||
*(u64 *)(s + 24) = c64;
|
asmv("movdqa (%0), %%xmm0\n"
|
||||||
}
|
"movdqa 16(%0), %%xmm1\n"
|
||||||
#else
|
"palignr $8, %%xmm0, %%xmm1\n"
|
||||||
for (; n; n--, s++)
|
"movdqa %%xmm1, (%1)\n"
|
||||||
*s = c;
|
:
|
||||||
#endif
|
: "r"(s), "r"(d)
|
||||||
|
: "xmm0", "xmm1");
|
||||||
|
d += 16;
|
||||||
|
s += 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
n -= num_vectors * 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy_unsafe(d, s, n);
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *memmove_unsafe(void *dest, const void *src, size_t n)
|
EXTERNC void *memcpy_sse4_1(void *dest, const void *src, size_t n)
|
||||||
{
|
{
|
||||||
#ifdef __GNUC__
|
CPU::__m128i *d = (CPU::__m128i *)dest;
|
||||||
typedef __attribute__((__may_alias__)) size_t WT;
|
const CPU::__m128i *s = (const CPU::__m128i *)src;
|
||||||
#define WS (sizeof(WT))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
char *d = dest;
|
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
|
||||||
const char *s = src;
|
|
||||||
|
|
||||||
if (d == s)
|
|
||||||
return d;
|
|
||||||
if ((uintptr_t)s - (uintptr_t)d - n <= -2 * n)
|
|
||||||
return memcpy(d, s, n);
|
|
||||||
|
|
||||||
if (d < s)
|
|
||||||
{
|
{
|
||||||
#ifdef __GNUC__
|
size_t num_vectors = n / 16;
|
||||||
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
|
for (size_t i = 0; i < num_vectors; i++)
|
||||||
{
|
{
|
||||||
while ((uintptr_t)d % WS)
|
// movntdqa
|
||||||
{
|
asmv("movdqa (%0), %%xmm0\n"
|
||||||
if (!n--)
|
"movdqa %%xmm0, (%1)\n"
|
||||||
return dest;
|
:
|
||||||
*d++ = *s++;
|
: "r"(s), "r"(d)
|
||||||
}
|
: "xmm0");
|
||||||
for (; n >= WS; n -= WS, d += WS, s += WS)
|
d += 16;
|
||||||
*(WT *)d = *(WT *)s;
|
s += 16;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
for (; n; n--)
|
n -= num_vectors * 16;
|
||||||
*d++ = *s++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#ifdef __GNUC__
|
|
||||||
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
|
|
||||||
{
|
|
||||||
while ((uintptr_t)(d + n) % WS)
|
|
||||||
{
|
|
||||||
if (!n--)
|
|
||||||
return dest;
|
|
||||||
d[n] = s[n];
|
|
||||||
}
|
|
||||||
while (n >= WS)
|
|
||||||
n -= WS, *(WT *)(d + n) = *(WT *)(s + n);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
while (n)
|
|
||||||
n--, d[n] = s[n];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memcpy_unsafe(d, s, n);
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
int memcmp(const void *vl, const void *vr, size_t n)
|
EXTERNC void *memcpy_sse4_2(void *dest, const void *src, size_t n)
|
||||||
{
|
{
|
||||||
const unsigned char *l = vl, *r = vr;
|
char *d = (char *)dest;
|
||||||
|
const char *s = (const char *)src;
|
||||||
|
|
||||||
|
if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0)
|
||||||
|
{
|
||||||
|
size_t num_vectors = n / 16;
|
||||||
|
for (size_t i = 0; i < num_vectors; i++)
|
||||||
|
{
|
||||||
|
asmv("movdqa (%0), %%xmm0\n"
|
||||||
|
"pcmpistri $0, (%0), %%xmm0\n"
|
||||||
|
"movdqa %%xmm0, (%1)\n"
|
||||||
|
:
|
||||||
|
: "r"(s), "r"(d)
|
||||||
|
: "xmm0");
|
||||||
|
d += 16;
|
||||||
|
s += 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
n -= num_vectors * 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy_unsafe(d, s, n);
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXTERNC int memcmp(const void *vl, const void *vr, size_t n)
|
||||||
|
{
|
||||||
|
const unsigned char *l = (unsigned char *)vl, *r = (unsigned char *)vr;
|
||||||
for (; n && *l == *r; n--, l++, r++)
|
for (; n && *l == *r; n--, l++, r++)
|
||||||
;
|
;
|
||||||
return n ? *l - *r : 0;
|
return n ? *l - *r : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void backspace(char s[])
|
EXTERNC void backspace(char s[])
|
||||||
{
|
{
|
||||||
int len = strlen(s);
|
int len = strlen(s);
|
||||||
s[len - 1] = '\0';
|
s[len - 1] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
void append(char s[], char n)
|
EXTERNC void append(char s[], char n)
|
||||||
{
|
{
|
||||||
int len = strlen(s);
|
int len = strlen(s);
|
||||||
s[len] = n;
|
s[len] = n;
|
||||||
s[len + 1] = '\0';
|
s[len + 1] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
int strncmp(const char *s1, const char *s2, size_t n)
|
EXTERNC int strncmp(const char *s1, const char *s2, size_t n)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < n; i++)
|
for (size_t i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
@ -355,7 +200,7 @@ int strncmp(const char *s1, const char *s2, size_t n)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
long unsigned strlen(const char s[])
|
EXTERNC long unsigned strlen(const char s[])
|
||||||
{
|
{
|
||||||
long unsigned i = 0;
|
long unsigned i = 0;
|
||||||
if (s)
|
if (s)
|
||||||
@ -364,7 +209,7 @@ long unsigned strlen(const char s[])
|
|||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *strcat_unsafe(char *destination, const char *source)
|
EXTERNC char *strcat_unsafe(char *destination, const char *source)
|
||||||
{
|
{
|
||||||
if ((destination == NULL) || (source == NULL))
|
if ((destination == NULL) || (source == NULL))
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -381,7 +226,7 @@ char *strcat_unsafe(char *destination, const char *source)
|
|||||||
return destination;
|
return destination;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *strcpy_unsafe(char *destination, const char *source)
|
EXTERNC char *strcpy_unsafe(char *destination, const char *source)
|
||||||
{
|
{
|
||||||
if (destination == NULL)
|
if (destination == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -396,7 +241,7 @@ char *strcpy_unsafe(char *destination, const char *source)
|
|||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *strncpy(char *destination, const char *source, unsigned long num)
|
EXTERNC char *strncpy(char *destination, const char *source, unsigned long num)
|
||||||
{
|
{
|
||||||
if (destination == NULL)
|
if (destination == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -411,14 +256,14 @@ char *strncpy(char *destination, const char *source, unsigned long num)
|
|||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
int strcmp(const char *l, const char *r)
|
EXTERNC int strcmp(const char *l, const char *r)
|
||||||
{
|
{
|
||||||
for (; *l == *r && *l; l++, r++)
|
for (; *l == *r && *l; l++, r++)
|
||||||
;
|
;
|
||||||
return *(unsigned char *)l - *(unsigned char *)r;
|
return *(unsigned char *)l - *(unsigned char *)r;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *strstr(const char *haystack, const char *needle)
|
EXTERNC char *strstr(const char *haystack, const char *needle)
|
||||||
{
|
{
|
||||||
const char *a = haystack, *b = needle;
|
const char *a = haystack, *b = needle;
|
||||||
while (1)
|
while (1)
|
||||||
@ -435,7 +280,7 @@ char *strstr(const char *haystack, const char *needle)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char *strchr(const char *String, int Char)
|
EXTERNC char *strchr(const char *String, int Char)
|
||||||
{
|
{
|
||||||
while (*String != (char)Char)
|
while (*String != (char)Char)
|
||||||
{
|
{
|
||||||
@ -445,24 +290,24 @@ char *strchr(const char *String, int Char)
|
|||||||
return (char *)String;
|
return (char *)String;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *strdup(const char *String)
|
EXTERNC char *strdup(const char *String)
|
||||||
{
|
{
|
||||||
char *OutBuffer = kmalloc(strlen((char *)String) + 1);
|
char *OutBuffer = (char *)kmalloc(strlen((char *)String) + 1);
|
||||||
strncpy(OutBuffer, String, strlen(String) + 1);
|
strncpy(OutBuffer, String, strlen(String) + 1);
|
||||||
return OutBuffer;
|
return OutBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
int isalpha(int c)
|
EXTERNC int isalpha(int c)
|
||||||
{
|
{
|
||||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
||||||
}
|
}
|
||||||
|
|
||||||
int isupper(int c)
|
EXTERNC int isupper(int c)
|
||||||
{
|
{
|
||||||
return (c >= 'A' && c <= 'Z');
|
return (c >= 'A' && c <= 'Z');
|
||||||
}
|
}
|
||||||
|
|
||||||
long int strtol(const char *str, char **endptr, int base)
|
EXTERNC long int strtol(const char *str, char **endptr, int base)
|
||||||
{
|
{
|
||||||
const char *s;
|
const char *s;
|
||||||
long acc, cutoff;
|
long acc, cutoff;
|
||||||
@ -527,7 +372,7 @@ long int strtol(const char *str, char **endptr, int base)
|
|||||||
return (acc);
|
return (acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long int strtoul(const char *str, char **endptr, int base)
|
EXTERNC unsigned long int strtoul(const char *str, char **endptr, int base)
|
||||||
{
|
{
|
||||||
const char *s;
|
const char *s;
|
||||||
unsigned long acc, cutoff;
|
unsigned long acc, cutoff;
|
||||||
@ -592,17 +437,17 @@ unsigned long int strtoul(const char *str, char **endptr, int base)
|
|||||||
return (acc);
|
return (acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
int isdigit(int c)
|
EXTERNC int isdigit(int c)
|
||||||
{
|
{
|
||||||
return c >= '0' && c <= '9';
|
return c >= '0' && c <= '9';
|
||||||
}
|
}
|
||||||
|
|
||||||
int isspace(int c)
|
EXTERNC int isspace(int c)
|
||||||
{
|
{
|
||||||
return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || c == '\v';
|
return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || c == '\v';
|
||||||
}
|
}
|
||||||
|
|
||||||
int isempty(char *str)
|
EXTERNC int isempty(char *str)
|
||||||
{
|
{
|
||||||
if (strlen(str) == 0)
|
if (strlen(str) == 0)
|
||||||
return 1;
|
return 1;
|
||||||
@ -615,7 +460,7 @@ int isempty(char *str)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int isdelim(char c, char *delim)
|
EXTERNC unsigned int isdelim(char c, char *delim)
|
||||||
{
|
{
|
||||||
while (*delim != '\0')
|
while (*delim != '\0')
|
||||||
{
|
{
|
||||||
@ -626,23 +471,23 @@ unsigned int isdelim(char c, char *delim)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int abs(int i) { return i < 0 ? -i : i; }
|
EXTERNC int abs(int i) { return i < 0 ? -i : i; }
|
||||||
|
|
||||||
void swap(char *x, char *y)
|
EXTERNC void swap(char *x, char *y)
|
||||||
{
|
{
|
||||||
char t = *x;
|
char t = *x;
|
||||||
*x = *y;
|
*x = *y;
|
||||||
*y = t;
|
*y = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *reverse(char *Buffer, int i, int j)
|
EXTERNC char *reverse(char *Buffer, int i, int j)
|
||||||
{
|
{
|
||||||
while (i < j)
|
while (i < j)
|
||||||
swap(&Buffer[i++], &Buffer[j--]);
|
swap(&Buffer[i++], &Buffer[j--]);
|
||||||
return Buffer;
|
return Buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
float sqrtf(float x)
|
EXTERNC float sqrtf(float x)
|
||||||
{
|
{
|
||||||
if (x < 0.0f)
|
if (x < 0.0f)
|
||||||
return NAN;
|
return NAN;
|
||||||
@ -660,7 +505,7 @@ float sqrtf(float x)
|
|||||||
return guess;
|
return guess;
|
||||||
}
|
}
|
||||||
|
|
||||||
double clamp(double x, double low, double high)
|
EXTERNC double clamp(double x, double low, double high)
|
||||||
{
|
{
|
||||||
if (x < low)
|
if (x < low)
|
||||||
return low;
|
return low;
|
||||||
@ -670,25 +515,25 @@ double clamp(double x, double low, double high)
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
float lerp(float a, float b, float t)
|
EXTERNC float lerp(float a, float b, float t)
|
||||||
{
|
{
|
||||||
return (1 - t) * a + t * b;
|
return (1 - t) * a + t * b;
|
||||||
}
|
}
|
||||||
|
|
||||||
float smoothstep(float a, float b, float t)
|
EXTERNC float smoothstep(float a, float b, float t)
|
||||||
{
|
{
|
||||||
t = clamp(t, 0.0, 1.0);
|
t = clamp(t, 0.0, 1.0);
|
||||||
return lerp(a, b, t * t * (3 - 2 * t));
|
return lerp(a, b, t * t * (3 - 2 * t));
|
||||||
}
|
}
|
||||||
|
|
||||||
float cubicInterpolate(float a, float b, float t)
|
EXTERNC float cubicInterpolate(float a, float b, float t)
|
||||||
{
|
{
|
||||||
float t2 = t * t;
|
float t2 = t * t;
|
||||||
float t3 = t2 * t;
|
float t3 = t2 * t;
|
||||||
return a + (-2 * t3 + 3 * t2) * b;
|
return a + (-2 * t3 + 3 * t2) * b;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *strtok(char *src, const char *delim)
|
EXTERNC char *strtok(char *src, const char *delim)
|
||||||
{
|
{
|
||||||
static char *src1;
|
static char *src1;
|
||||||
if (!src)
|
if (!src)
|
||||||
@ -728,7 +573,7 @@ char *strtok(char *src, const char *delim)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int atoi(const char *String)
|
EXTERNC int atoi(const char *String)
|
||||||
{
|
{
|
||||||
uint64_t Length = strlen((char *)String);
|
uint64_t Length = strlen((char *)String);
|
||||||
uint64_t OutBuffer = 0;
|
uint64_t OutBuffer = 0;
|
||||||
@ -741,7 +586,7 @@ int atoi(const char *String)
|
|||||||
return OutBuffer;
|
return OutBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
double atof(const char *String)
|
EXTERNC double atof(const char *String)
|
||||||
{
|
{
|
||||||
// Originally from https://github.com/GaloisInc/minlibc/blob/master/atof.c
|
// Originally from https://github.com/GaloisInc/minlibc/blob/master/atof.c
|
||||||
/*
|
/*
|
||||||
@ -823,7 +668,7 @@ double atof(const char *String)
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *itoa(int Value, char *Buffer, int Base)
|
EXTERNC char *itoa(int Value, char *Buffer, int Base)
|
||||||
{
|
{
|
||||||
if (Base < 2 || Base > 32)
|
if (Base < 2 || Base > 32)
|
||||||
return Buffer;
|
return Buffer;
|
||||||
@ -851,7 +696,7 @@ char *itoa(int Value, char *Buffer, int Base)
|
|||||||
return reverse(Buffer, 0, i - 1);
|
return reverse(Buffer, 0, i - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *ltoa(long Value, char *Buffer, int Base)
|
EXTERNC char *ltoa(long Value, char *Buffer, int Base)
|
||||||
{
|
{
|
||||||
if (Base < 2 || Base > 32)
|
if (Base < 2 || Base > 32)
|
||||||
return Buffer;
|
return Buffer;
|
||||||
@ -879,7 +724,7 @@ char *ltoa(long Value, char *Buffer, int Base)
|
|||||||
return reverse(Buffer, 0, i - 1);
|
return reverse(Buffer, 0, i - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
char *ultoa(unsigned long Value, char *Buffer, int Base)
|
EXTERNC char *ultoa(unsigned long Value, char *Buffer, int Base)
|
||||||
{
|
{
|
||||||
if (Base < 2 || Base > 32)
|
if (Base < 2 || Base > 32)
|
||||||
return Buffer;
|
return Buffer;
|
||||||
@ -904,7 +749,7 @@ char *ultoa(unsigned long Value, char *Buffer, int Base)
|
|||||||
return reverse(Buffer, 0, i - 1);
|
return reverse(Buffer, 0, i - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void __chk_fail(void) __attribute__((__noreturn__));
|
EXTERNC void __chk_fail(void) __attribute__((__noreturn__));
|
||||||
|
|
||||||
__noreturn static inline void __convert_chk_fail(void)
|
__noreturn static inline void __convert_chk_fail(void)
|
||||||
{
|
{
|
||||||
@ -918,7 +763,7 @@ __noreturn static inline void __convert_chk_fail(void)
|
|||||||
|
|
||||||
// #define DBG_CHK 1
|
// #define DBG_CHK 1
|
||||||
|
|
||||||
__no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len, size_t slen)
|
EXTERNC __no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len, size_t slen)
|
||||||
{
|
{
|
||||||
#ifdef DBG_CHK
|
#ifdef DBG_CHK
|
||||||
debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen);
|
debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen);
|
||||||
@ -949,10 +794,36 @@ __no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len,
|
|||||||
|
|
||||||
if (unlikely(len > slen))
|
if (unlikely(len > slen))
|
||||||
__chk_fail();
|
__chk_fail();
|
||||||
return memcpy_unsafe(dest, src, len);
|
|
||||||
|
switch (CPU::CheckSIMD())
|
||||||
|
{
|
||||||
|
case CPU::x86SIMDType::SIMD_SSE:
|
||||||
|
return memcpy_sse(dest, src, len);
|
||||||
|
break;
|
||||||
|
case CPU::x86SIMDType::SIMD_SSE2:
|
||||||
|
return memcpy_sse2(dest, src, len);
|
||||||
|
break;
|
||||||
|
case CPU::x86SIMDType::SIMD_SSE3:
|
||||||
|
return memcpy_sse3(dest, src, len);
|
||||||
|
break;
|
||||||
|
case CPU::x86SIMDType::SIMD_SSSE3:
|
||||||
|
return memcpy_ssse3(dest, src, len);
|
||||||
|
break;
|
||||||
|
case CPU::x86SIMDType::SIMD_SSE41:
|
||||||
|
return memcpy_sse4_1(dest, src, len);
|
||||||
|
break;
|
||||||
|
case CPU::x86SIMDType::SIMD_SSE42:
|
||||||
|
return memcpy_sse4_2(dest, src, len);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return memcpy_unsafe(dest, src, len);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
error("Should not be here!");
|
||||||
|
CPU::Stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
__no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t slen)
|
EXTERNC __no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t slen)
|
||||||
{
|
{
|
||||||
#ifdef DBG_CHK
|
#ifdef DBG_CHK
|
||||||
debug("( dest:%#lx val:%#x len:%llu slen:%llu )", dest, val, len, slen);
|
debug("( dest:%#lx val:%#x len:%llu slen:%llu )", dest, val, len, slen);
|
||||||
@ -980,7 +851,7 @@ __no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t
|
|||||||
return memset_unsafe(dest, val, len);
|
return memset_unsafe(dest, val, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
__no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len, size_t slen)
|
EXTERNC __no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len, size_t slen)
|
||||||
{
|
{
|
||||||
#ifdef DBG_CHK
|
#ifdef DBG_CHK
|
||||||
debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen);
|
debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen);
|
||||||
@ -1014,7 +885,7 @@ __no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len
|
|||||||
return memmove_unsafe(dest, src, len);
|
return memmove_unsafe(dest, src, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
__no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen)
|
EXTERNC __no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen)
|
||||||
{
|
{
|
||||||
#ifdef DBG_CHK
|
#ifdef DBG_CHK
|
||||||
debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen);
|
debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen);
|
||||||
@ -1043,7 +914,7 @@ __no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen
|
|||||||
return strcat_unsafe(dest, src);
|
return strcat_unsafe(dest, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
__no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen)
|
EXTERNC __no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen)
|
||||||
{
|
{
|
||||||
#ifdef DBG_CHK
|
#ifdef DBG_CHK
|
||||||
debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen);
|
debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen);
|
||||||
@ -1070,5 +941,6 @@ __no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen
|
|||||||
|
|
||||||
if (unlikely(len >= slen))
|
if (unlikely(len >= slen))
|
||||||
__chk_fail();
|
__chk_fail();
|
||||||
|
|
||||||
return strcpy_unsafe(dest, src);
|
return strcpy_unsafe(dest, src);
|
||||||
}
|
}
|
320
Library/memcpy.c
Normal file
320
Library/memcpy.c
Normal file
@ -0,0 +1,320 @@
|
|||||||
|
#include <convert.h>
|
||||||
|
|
||||||
|
#include <memory.hpp>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <debug.h>
|
||||||
|
|
||||||
|
/* Some of the functions are from musl library */
|
||||||
|
/* https://www.musl-libc.org/ */
|
||||||
|
/*
|
||||||
|
Copyright © 2005-2020 Rich Felker, et al.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void *memcpy_unsafe(void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
unsigned char *d = dest;
|
||||||
|
const unsigned char *s = src;
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
|
||||||
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||||
|
#define LS >>
|
||||||
|
#define RS <<
|
||||||
|
#else
|
||||||
|
#define LS <<
|
||||||
|
#define RS >>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef uint32_t __attribute__((__may_alias__)) u32;
|
||||||
|
uint32_t w, x;
|
||||||
|
|
||||||
|
for (; (uintptr_t)s % 4 && n; n--)
|
||||||
|
*d++ = *s++;
|
||||||
|
|
||||||
|
if ((uintptr_t)d % 4 == 0)
|
||||||
|
{
|
||||||
|
for (; n >= 16; s += 16, d += 16, n -= 16)
|
||||||
|
{
|
||||||
|
*(u32 *)(d + 0) = *(u32 *)(s + 0);
|
||||||
|
*(u32 *)(d + 4) = *(u32 *)(s + 4);
|
||||||
|
*(u32 *)(d + 8) = *(u32 *)(s + 8);
|
||||||
|
*(u32 *)(d + 12) = *(u32 *)(s + 12);
|
||||||
|
}
|
||||||
|
if (n & 8)
|
||||||
|
{
|
||||||
|
*(u32 *)(d + 0) = *(u32 *)(s + 0);
|
||||||
|
*(u32 *)(d + 4) = *(u32 *)(s + 4);
|
||||||
|
d += 8;
|
||||||
|
s += 8;
|
||||||
|
}
|
||||||
|
if (n & 4)
|
||||||
|
{
|
||||||
|
*(u32 *)(d + 0) = *(u32 *)(s + 0);
|
||||||
|
d += 4;
|
||||||
|
s += 4;
|
||||||
|
}
|
||||||
|
if (n & 2)
|
||||||
|
{
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
}
|
||||||
|
if (n & 1)
|
||||||
|
{
|
||||||
|
*d = *s;
|
||||||
|
}
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n >= 32)
|
||||||
|
switch ((uintptr_t)d % 4)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
w = *(u32 *)s;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
n -= 3;
|
||||||
|
for (; n >= 17; s += 16, d += 16, n -= 16)
|
||||||
|
{
|
||||||
|
x = *(u32 *)(s + 1);
|
||||||
|
*(u32 *)(d + 0) = (w LS 24) | (x RS 8);
|
||||||
|
w = *(u32 *)(s + 5);
|
||||||
|
*(u32 *)(d + 4) = (x LS 24) | (w RS 8);
|
||||||
|
x = *(u32 *)(s + 9);
|
||||||
|
*(u32 *)(d + 8) = (w LS 24) | (x RS 8);
|
||||||
|
w = *(u32 *)(s + 13);
|
||||||
|
*(u32 *)(d + 12) = (x LS 24) | (w RS 8);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
w = *(u32 *)s;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
n -= 2;
|
||||||
|
for (; n >= 18; s += 16, d += 16, n -= 16)
|
||||||
|
{
|
||||||
|
x = *(u32 *)(s + 2);
|
||||||
|
*(u32 *)(d + 0) = (w LS 16) | (x RS 16);
|
||||||
|
w = *(u32 *)(s + 6);
|
||||||
|
*(u32 *)(d + 4) = (x LS 16) | (w RS 16);
|
||||||
|
x = *(u32 *)(s + 10);
|
||||||
|
*(u32 *)(d + 8) = (w LS 16) | (x RS 16);
|
||||||
|
w = *(u32 *)(s + 14);
|
||||||
|
*(u32 *)(d + 12) = (x LS 16) | (w RS 16);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
w = *(u32 *)s;
|
||||||
|
*d++ = *s++;
|
||||||
|
n -= 1;
|
||||||
|
for (; n >= 19; s += 16, d += 16, n -= 16)
|
||||||
|
{
|
||||||
|
x = *(u32 *)(s + 3);
|
||||||
|
*(u32 *)(d + 0) = (w LS 8) | (x RS 24);
|
||||||
|
w = *(u32 *)(s + 7);
|
||||||
|
*(u32 *)(d + 4) = (x LS 8) | (w RS 24);
|
||||||
|
x = *(u32 *)(s + 11);
|
||||||
|
*(u32 *)(d + 8) = (w LS 8) | (x RS 24);
|
||||||
|
w = *(u32 *)(s + 15);
|
||||||
|
*(u32 *)(d + 12) = (x LS 8) | (w RS 24);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (n & 16)
|
||||||
|
{
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
}
|
||||||
|
if (n & 8)
|
||||||
|
{
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
}
|
||||||
|
if (n & 4)
|
||||||
|
{
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
}
|
||||||
|
if (n & 2)
|
||||||
|
{
|
||||||
|
*d++ = *s++;
|
||||||
|
*d++ = *s++;
|
||||||
|
}
|
||||||
|
if (n & 1)
|
||||||
|
{
|
||||||
|
*d = *s;
|
||||||
|
}
|
||||||
|
return dest;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (; n; n--)
|
||||||
|
*d++ = *s++;
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *memset_unsafe(void *dest, int c, size_t n)
|
||||||
|
{
|
||||||
|
unsigned char *s = dest;
|
||||||
|
size_t k;
|
||||||
|
|
||||||
|
if (!n)
|
||||||
|
return dest;
|
||||||
|
s[0] = c;
|
||||||
|
s[n - 1] = c;
|
||||||
|
if (n <= 2)
|
||||||
|
return dest;
|
||||||
|
s[1] = c;
|
||||||
|
s[2] = c;
|
||||||
|
s[n - 2] = c;
|
||||||
|
s[n - 3] = c;
|
||||||
|
if (n <= 6)
|
||||||
|
return dest;
|
||||||
|
s[3] = c;
|
||||||
|
s[n - 4] = c;
|
||||||
|
if (n <= 8)
|
||||||
|
return dest;
|
||||||
|
|
||||||
|
k = -(uintptr_t)s & 3;
|
||||||
|
s += k;
|
||||||
|
n -= k;
|
||||||
|
n &= -4;
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
typedef uint32_t __attribute__((__may_alias__)) u32;
|
||||||
|
typedef uint64_t __attribute__((__may_alias__)) u64;
|
||||||
|
|
||||||
|
u32 c32 = ((u32)-1) / 255 * (unsigned char)c;
|
||||||
|
*(u32 *)(s + 0) = c32;
|
||||||
|
*(u32 *)(s + n - 4) = c32;
|
||||||
|
if (n <= 8)
|
||||||
|
return dest;
|
||||||
|
*(u32 *)(s + 4) = c32;
|
||||||
|
*(u32 *)(s + 8) = c32;
|
||||||
|
*(u32 *)(s + n - 12) = c32;
|
||||||
|
*(u32 *)(s + n - 8) = c32;
|
||||||
|
if (n <= 24)
|
||||||
|
return dest;
|
||||||
|
*(u32 *)(s + 12) = c32;
|
||||||
|
*(u32 *)(s + 16) = c32;
|
||||||
|
*(u32 *)(s + 20) = c32;
|
||||||
|
*(u32 *)(s + 24) = c32;
|
||||||
|
*(u32 *)(s + n - 28) = c32;
|
||||||
|
*(u32 *)(s + n - 24) = c32;
|
||||||
|
*(u32 *)(s + n - 20) = c32;
|
||||||
|
*(u32 *)(s + n - 16) = c32;
|
||||||
|
|
||||||
|
k = 24 + ((uintptr_t)s & 4);
|
||||||
|
s += k;
|
||||||
|
n -= k;
|
||||||
|
|
||||||
|
u64 c64 = c32 | ((u64)c32 << 32);
|
||||||
|
for (; n >= 32; n -= 32, s += 32)
|
||||||
|
{
|
||||||
|
*(u64 *)(s + 0) = c64;
|
||||||
|
*(u64 *)(s + 8) = c64;
|
||||||
|
*(u64 *)(s + 16) = c64;
|
||||||
|
*(u64 *)(s + 24) = c64;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
for (; n; n--, s++)
|
||||||
|
*s = c;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *memmove_unsafe(void *dest, const void *src, size_t n)
|
||||||
|
{
|
||||||
|
#ifdef __GNUC__
|
||||||
|
typedef __attribute__((__may_alias__)) size_t WT;
|
||||||
|
#define WS (sizeof(WT))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
char *d = dest;
|
||||||
|
const char *s = src;
|
||||||
|
|
||||||
|
if (d == s)
|
||||||
|
return d;
|
||||||
|
if ((uintptr_t)s - (uintptr_t)d - n <= -2 * n)
|
||||||
|
return memcpy(d, s, n);
|
||||||
|
|
||||||
|
if (d < s)
|
||||||
|
{
|
||||||
|
#ifdef __GNUC__
|
||||||
|
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
|
||||||
|
{
|
||||||
|
while ((uintptr_t)d % WS)
|
||||||
|
{
|
||||||
|
if (!n--)
|
||||||
|
return dest;
|
||||||
|
*d++ = *s++;
|
||||||
|
}
|
||||||
|
for (; n >= WS; n -= WS, d += WS, s += WS)
|
||||||
|
*(WT *)d = *(WT *)s;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; n; n--)
|
||||||
|
*d++ = *s++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifdef __GNUC__
|
||||||
|
if ((uintptr_t)s % WS == (uintptr_t)d % WS)
|
||||||
|
{
|
||||||
|
while ((uintptr_t)(d + n) % WS)
|
||||||
|
{
|
||||||
|
if (!n--)
|
||||||
|
return dest;
|
||||||
|
d[n] = s[n];
|
||||||
|
}
|
||||||
|
while (n >= WS)
|
||||||
|
n -= WS, *(WT *)(d + n) = *(WT *)(s + n);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
while (n)
|
||||||
|
n--, d[n] = s[n];
|
||||||
|
}
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
@ -788,8 +788,8 @@ namespace Tasking
|
|||||||
Thread->ExitCode = 0xdead;
|
Thread->ExitCode = 0xdead;
|
||||||
Thread->Status = TaskStatus::Ready;
|
Thread->Status = TaskStatus::Ready;
|
||||||
Thread->Memory = new Memory::MemMgr(Parent->PageTable);
|
Thread->Memory = new Memory::MemMgr(Parent->PageTable);
|
||||||
Thread->FPU = (FXState *)Thread->Memory->RequestPages(TO_PAGES(sizeof(FXState)));
|
Thread->FPU = (CPU::x64::FXState *)Thread->Memory->RequestPages(TO_PAGES(sizeof(CPU::x64::FXState)));
|
||||||
memset(Thread->FPU, 0, FROM_PAGES(TO_PAGES(sizeof(FXState))));
|
memset(Thread->FPU, 0, FROM_PAGES(TO_PAGES(sizeof(CPU::x64::FXState))));
|
||||||
|
|
||||||
// TODO: Is really a good idea to use the FPU in kernel mode?
|
// TODO: Is really a good idea to use the FPU in kernel mode?
|
||||||
Thread->FPU->mxcsr = 0b0001111110000000;
|
Thread->FPU->mxcsr = 0b0001111110000000;
|
||||||
@ -1169,10 +1169,37 @@ namespace Tasking
|
|||||||
debug("Created Kernel Process: %s and Thread: %s", kproc->Name, kthrd->Name);
|
debug("Created Kernel Process: %s and Thread: %s", kproc->Name, kthrd->Name);
|
||||||
TaskingLock.Lock(__FUNCTION__);
|
TaskingLock.Lock(__FUNCTION__);
|
||||||
|
|
||||||
|
bool MONITORSupported = false;
|
||||||
|
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0)
|
||||||
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
uint32_t rax, rbx, rcx, rdx;
|
CPU::x64::AMD::CPUID0x1 cpuid1amd;
|
||||||
CPU::x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx);
|
#elif defined(__i386__)
|
||||||
if (rcx & CPU::x64::CPUID_FEAT_RCX_MONITOR)
|
CPU::x32::AMD::CPUID0x1 cpuid1amd;
|
||||||
|
#endif
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
asmv("cpuid"
|
||||||
|
: "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw)
|
||||||
|
: "a"(0x1));
|
||||||
|
#endif
|
||||||
|
MONITORSupported = cpuid1amd.ECX.MONITOR;
|
||||||
|
}
|
||||||
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
|
{
|
||||||
|
#if defined(__amd64__)
|
||||||
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
#elif defined(__i386__)
|
||||||
|
CPU::x32::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
#endif
|
||||||
|
#if defined(__amd64__) || defined(__i386__)
|
||||||
|
asmv("cpuid"
|
||||||
|
: "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw)
|
||||||
|
: "a"(0x1));
|
||||||
|
#endif
|
||||||
|
MONITORSupported = cpuid1intel.ECX.MONITOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (MONITORSupported)
|
||||||
{
|
{
|
||||||
trace("CPU has MONITOR/MWAIT support.");
|
trace("CPU has MONITOR/MWAIT support.");
|
||||||
}
|
}
|
||||||
@ -1182,7 +1209,7 @@ namespace Tasking
|
|||||||
error("Interrupts are not enabled.");
|
error("Interrupts are not enabled.");
|
||||||
CPU::Interrupts(CPU::Enable);
|
CPU::Interrupts(CPU::Enable);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
TaskingLock.Unlock();
|
TaskingLock.Unlock();
|
||||||
IdleProcess = CreateProcess(nullptr, (char *)"Idle", TaskTrustLevel::Idle);
|
IdleProcess = CreateProcess(nullptr, (char *)"Idle", TaskTrustLevel::Idle);
|
||||||
for (int i = 0; i < SMP::CPUCores; i++)
|
for (int i = 0; i < SMP::CPUCores; i++)
|
||||||
|
@ -21,7 +21,7 @@ __constructor void TestRandom()
|
|||||||
#endif
|
#endif
|
||||||
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
RDRANDFlag = cpuid1amd.ECX.RDRAND;
|
||||||
}
|
}
|
||||||
if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
CPU::x64::Intel::CPUID0x1 cpuid1intel;
|
||||||
|
565
include/cpu.hpp
565
include/cpu.hpp
@ -123,6 +123,29 @@ namespace CPU
|
|||||||
SIMD_SSSE3,
|
SIMD_SSSE3,
|
||||||
SIMD_SSE41,
|
SIMD_SSE41,
|
||||||
SIMD_SSE42,
|
SIMD_SSE42,
|
||||||
|
SIMD_AVX,
|
||||||
|
SIMD_AVX2,
|
||||||
|
SIMD_AVX512F,
|
||||||
|
SIMD_AVX512BW,
|
||||||
|
SIMD_AVX512CD,
|
||||||
|
SIMD_AVX512DQ,
|
||||||
|
SIMD_AVX512ER,
|
||||||
|
SIMD_AVX512IFMA,
|
||||||
|
SIMD_AVX512PF,
|
||||||
|
SIMD_AVX512VBMI,
|
||||||
|
SIMD_AVX512VL,
|
||||||
|
SIMD_AVX512VNNI,
|
||||||
|
SIMD_AVX512BITALG,
|
||||||
|
SIMD_AVX512VPOPCNTDQ,
|
||||||
|
SIMD_AVX512_4VNNIW,
|
||||||
|
SIMD_AVX512_4FMAPS,
|
||||||
|
SIMD_AVX512_VP2INTERSECT,
|
||||||
|
SIMD_AVX512_BF16,
|
||||||
|
SIMD_AVX512_VBMI2,
|
||||||
|
SIMD_AVX512_GFNI,
|
||||||
|
SIMD_AVX512_VAES,
|
||||||
|
SIMD_AVX512_VPCLMULQDQ,
|
||||||
|
SIMD_AVX512_VNNI,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -147,7 +170,7 @@ namespace CPU
|
|||||||
char *Hypervisor();
|
char *Hypervisor();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Check SIMD support.
|
* @brief Check SIMD support. It will return the highest supported SIMD type.
|
||||||
*
|
*
|
||||||
* @return x86SIMDType
|
* @return x86SIMDType
|
||||||
*/
|
*/
|
||||||
@ -227,11 +250,25 @@ namespace CPU
|
|||||||
void *PageTable(void *PT = nullptr);
|
void *PageTable(void *PT = nullptr);
|
||||||
|
|
||||||
/** @brief To be used only once. */
|
/** @brief To be used only once. */
|
||||||
void InitializeFeatures();
|
void InitializeFeatures(long Core);
|
||||||
|
|
||||||
/** @brief Get CPU counter value. */
|
/** @brief Get CPU counter value. */
|
||||||
uintptr_t Counter();
|
uintptr_t Counter();
|
||||||
|
|
||||||
|
typedef int __v4si __attribute__((__vector_size__(16)));
|
||||||
|
|
||||||
|
typedef union
|
||||||
|
{
|
||||||
|
__v4si vector;
|
||||||
|
long long int i64[2];
|
||||||
|
int i32[4];
|
||||||
|
short i16[8];
|
||||||
|
char i8[16];
|
||||||
|
int __attribute__((__vector_size__(16))) m128i_i32;
|
||||||
|
short __attribute__((__vector_size__(16))) m128i_i16;
|
||||||
|
char __attribute__((__vector_size__(16))) m128i_i8;
|
||||||
|
} __m128i;
|
||||||
|
|
||||||
namespace MemBar
|
namespace MemBar
|
||||||
{
|
{
|
||||||
SafeFunction static inline void Barrier()
|
SafeFunction static inline void Barrier()
|
||||||
@ -552,6 +589,32 @@ namespace CPU
|
|||||||
uint32_t raw;
|
uint32_t raw;
|
||||||
} DR7;
|
} DR7;
|
||||||
|
|
||||||
|
struct FXState
|
||||||
|
{
|
||||||
|
/** @brief FPU control word */
|
||||||
|
uint16_t fcw;
|
||||||
|
/** @brief FPU status word */
|
||||||
|
uint16_t fsw;
|
||||||
|
/** @brief FPU tag words */
|
||||||
|
uint8_t ftw;
|
||||||
|
/** @brief Reserved (zero) */
|
||||||
|
uint8_t Reserved;
|
||||||
|
/** @brief FPU opcode */
|
||||||
|
uint16_t fop;
|
||||||
|
/** @brief PFU instruction pointer */
|
||||||
|
uint64_t rip;
|
||||||
|
/** @brief FPU data pointer */
|
||||||
|
uint64_t rdp;
|
||||||
|
/** @brief SSE control register */
|
||||||
|
uint32_t mxcsr;
|
||||||
|
/** @brief SSE control register mask */
|
||||||
|
uint32_t mxcsrmask;
|
||||||
|
/** @brief FPU registers (last 6 bytes reserved) */
|
||||||
|
uint8_t st[8][16];
|
||||||
|
/** @brief XMM registers */
|
||||||
|
uint8_t xmm[16][16];
|
||||||
|
} __attribute__((packed));
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief CPUID
|
* @brief CPUID
|
||||||
*
|
*
|
||||||
@ -2125,79 +2188,6 @@ namespace CPU
|
|||||||
|
|
||||||
namespace x64
|
namespace x64
|
||||||
{
|
{
|
||||||
enum CPUIDFeatures
|
|
||||||
{
|
|
||||||
CPUID_FEAT_RCX_SSE3 = 1 << 0,
|
|
||||||
CPUID_FEAT_RCX_PCLMULQDQ = 1 << 1,
|
|
||||||
CPUID_FEAT_RCX_DTES64 = 1 << 2,
|
|
||||||
CPUID_FEAT_RCX_MONITOR = 1 << 3,
|
|
||||||
CPUID_FEAT_RCX_DS_CPL = 1 << 4,
|
|
||||||
CPUID_FEAT_RCX_VMX = 1 << 5,
|
|
||||||
CPUID_FEAT_RCX_SMX = 1 << 6,
|
|
||||||
CPUID_FEAT_RCX_EST = 1 << 7,
|
|
||||||
CPUID_FEAT_RCX_TM2 = 1 << 8,
|
|
||||||
CPUID_FEAT_RCX_SSSE3 = 1 << 9,
|
|
||||||
CPUID_FEAT_RCX_CID = 1 << 10,
|
|
||||||
CPUID_FEAT_RCX_FMA = 1 << 12,
|
|
||||||
CPUID_FEAT_RCX_CX16 = 1 << 13,
|
|
||||||
CPUID_FEAT_RCX_ETPRD = 1 << 14,
|
|
||||||
CPUID_FEAT_RCX_PDCM = 1 << 15,
|
|
||||||
CPUID_FEAT_RCX_PCIDE = 1 << 17,
|
|
||||||
CPUID_FEAT_RCX_DCA = 1 << 18,
|
|
||||||
CPUID_FEAT_RCX_SSE4_1 = 1 << 19,
|
|
||||||
CPUID_FEAT_RCX_SSE4_2 = 1 << 20,
|
|
||||||
CPUID_FEAT_RCX_x2APIC = 1 << 21,
|
|
||||||
CPUID_FEAT_RCX_MOVBE = 1 << 22,
|
|
||||||
CPUID_FEAT_RCX_POPCNT = 1 << 23,
|
|
||||||
CPUID_FEAT_RCX_AES = 1 << 25,
|
|
||||||
CPUID_FEAT_RCX_XSAVE = 1 << 26,
|
|
||||||
CPUID_FEAT_RCX_OSXSAVE = 1 << 27,
|
|
||||||
CPUID_FEAT_RCX_AVX = 1 << 28,
|
|
||||||
CPUID_FEAT_RCX_F16C = 1 << 29,
|
|
||||||
CPUID_FEAT_RCX_RDRAND = 1 << 30,
|
|
||||||
|
|
||||||
CPUID_FEAT_RDX_FPU = 1 << 0,
|
|
||||||
CPUID_FEAT_RDX_VME = 1 << 1,
|
|
||||||
CPUID_FEAT_RDX_DE = 1 << 2,
|
|
||||||
CPUID_FEAT_RDX_PSE = 1 << 3,
|
|
||||||
CPUID_FEAT_RDX_TSC = 1 << 4,
|
|
||||||
CPUID_FEAT_RDX_MSR = 1 << 5,
|
|
||||||
CPUID_FEAT_RDX_PAE = 1 << 6,
|
|
||||||
CPUID_FEAT_RDX_MCE = 1 << 7,
|
|
||||||
CPUID_FEAT_RDX_CX8 = 1 << 8,
|
|
||||||
CPUID_FEAT_RDX_APIC = 1 << 9,
|
|
||||||
CPUID_FEAT_RDX_SEP = 1 << 11,
|
|
||||||
CPUID_FEAT_RDX_MTRR = 1 << 12,
|
|
||||||
CPUID_FEAT_RDX_PGE = 1 << 13,
|
|
||||||
CPUID_FEAT_RDX_MCA = 1 << 14,
|
|
||||||
CPUID_FEAT_RDX_CMOV = 1 << 15,
|
|
||||||
CPUID_FEAT_RDX_PAT = 1 << 16,
|
|
||||||
CPUID_FEAT_RDX_PSE36 = 1 << 17,
|
|
||||||
CPUID_FEAT_RDX_PSN = 1 << 18,
|
|
||||||
CPUID_FEAT_RDX_CLF = 1 << 19,
|
|
||||||
CPUID_FEAT_RDX_DTES = 1 << 21,
|
|
||||||
CPUID_FEAT_RDX_ACPI = 1 << 22,
|
|
||||||
CPUID_FEAT_RDX_MMX = 1 << 23,
|
|
||||||
CPUID_FEAT_RDX_FXSR = 1 << 24,
|
|
||||||
CPUID_FEAT_RDX_SSE = 1 << 25,
|
|
||||||
CPUID_FEAT_RDX_SSE2 = 1 << 26,
|
|
||||||
CPUID_FEAT_RDX_SS = 1 << 27,
|
|
||||||
CPUID_FEAT_RDX_HTT = 1 << 28,
|
|
||||||
CPUID_FEAT_RDX_TM1 = 1 << 29,
|
|
||||||
CPUID_FEAT_RDX_IA64 = 1 << 30,
|
|
||||||
CPUID_FEAT_RDX_PBE = 1 << 31,
|
|
||||||
|
|
||||||
// ? Not sure how to get it.
|
|
||||||
CPUID_FEAT_RDX_SMEP = 1 << 7,
|
|
||||||
CPUID_FEAT_RDX_UMIP = 1 << 2,
|
|
||||||
CPUID_FEAT_RDX_SYSCALL = 1 << 11,
|
|
||||||
CPUID_FEAT_XD = 1 << 20,
|
|
||||||
CPUID_FEAT_1GB_PAGE = 1 << 26,
|
|
||||||
CPUID_FEAT_RDTSCP = 1 << 27,
|
|
||||||
CPUID_FEAT_LONG_MODE = 1 << 29,
|
|
||||||
CPUID_FEAT_RDX_SMAP = (1 << 20)
|
|
||||||
};
|
|
||||||
|
|
||||||
enum MSRID
|
enum MSRID
|
||||||
{
|
{
|
||||||
MSR_MONITOR_FILTER_SIZE = 0x6,
|
MSR_MONITOR_FILTER_SIZE = 0x6,
|
||||||
@ -3110,6 +3100,41 @@ namespace CPU
|
|||||||
uint64_t raw;
|
uint64_t raw;
|
||||||
} CR8;
|
} CR8;
|
||||||
|
|
||||||
|
typedef union XCR0
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
On https://wiki.osdev.org/CPU_Registers_x86#XCR0 says that the PKRU bit is 9?
|
||||||
|
*/
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/** @brief X87 FPU/MMX/SSE Support (must be 1) */
|
||||||
|
uint64_t X87 : 1;
|
||||||
|
/** @brief XSAVE support for MXCSR and XMM registers */
|
||||||
|
uint64_t SSE : 1;
|
||||||
|
/** @brief AVX support for YMM registers */
|
||||||
|
uint64_t AVX : 1;
|
||||||
|
/** @brief MPX support for BND registers */
|
||||||
|
uint64_t BNDREG : 1;
|
||||||
|
/** @brief MPX support for BNDCFGU and BNDSTATUS registers */
|
||||||
|
uint64_t BNDCSR : 1;
|
||||||
|
/** @brief AVX-512 support for opmask registers */
|
||||||
|
uint64_t OpMask : 1;
|
||||||
|
/** @brief AVX-512 enabled and XSAVE support for upper halves of lower ZMM registers */
|
||||||
|
uint64_t ZMM_HI256 : 1;
|
||||||
|
/** @brief AVX-512 enabled and XSAVE support for upper ZMM registers */
|
||||||
|
uint64_t HI16_ZMM : 1;
|
||||||
|
/** @brief XSAVE support for PKRU register */
|
||||||
|
uint64_t PKRU : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved0 : 53;
|
||||||
|
/** @brief AMD lightweight profiling */
|
||||||
|
uint64_t LWP : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved1 : 1;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} XCR0;
|
||||||
|
|
||||||
typedef union EFER
|
typedef union EFER
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
@ -3233,6 +3258,32 @@ namespace CPU
|
|||||||
uint64_t raw;
|
uint64_t raw;
|
||||||
} SelectorErrorCode;
|
} SelectorErrorCode;
|
||||||
|
|
||||||
|
struct FXState
|
||||||
|
{
|
||||||
|
/** @brief FPU control word */
|
||||||
|
uint16_t fcw;
|
||||||
|
/** @brief FPU status word */
|
||||||
|
uint16_t fsw;
|
||||||
|
/** @brief FPU tag words */
|
||||||
|
uint8_t ftw;
|
||||||
|
/** @brief Reserved (zero) */
|
||||||
|
uint8_t Reserved;
|
||||||
|
/** @brief FPU opcode */
|
||||||
|
uint16_t fop;
|
||||||
|
/** @brief PFU instruction pointer */
|
||||||
|
uint64_t rip;
|
||||||
|
/** @brief FPU data pointer */
|
||||||
|
uint64_t rdp;
|
||||||
|
/** @brief SSE control register */
|
||||||
|
uint32_t mxcsr;
|
||||||
|
/** @brief SSE control register mask */
|
||||||
|
uint32_t mxcsrmask;
|
||||||
|
/** @brief FPU registers (last 6 bytes reserved) */
|
||||||
|
uint8_t st[8][16];
|
||||||
|
/** @brief XMM registers */
|
||||||
|
uint8_t xmm[16][16];
|
||||||
|
} __attribute__((packed));
|
||||||
|
|
||||||
SafeFunction static inline void lgdt(void *gdt)
|
SafeFunction static inline void lgdt(void *gdt)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
@ -3375,6 +3426,18 @@ namespace CPU
|
|||||||
return (CR8){.raw = Result};
|
return (CR8){.raw = Result};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SafeFunction static inline XCR0 readxcr0()
|
||||||
|
{
|
||||||
|
uint64_t Result = 0;
|
||||||
|
#if defined(__amd64__)
|
||||||
|
asmv("xgetbv"
|
||||||
|
: "=a"(Result)
|
||||||
|
: "c"(0)
|
||||||
|
: "edx");
|
||||||
|
#endif
|
||||||
|
return (XCR0){.raw = Result};
|
||||||
|
}
|
||||||
|
|
||||||
SafeFunction static inline void writecr0(CR0 ControlRegister)
|
SafeFunction static inline void writecr0(CR0 ControlRegister)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
@ -3425,6 +3488,16 @@ namespace CPU
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SafeFunction static inline void writexcr0(XCR0 ControlRegister)
|
||||||
|
{
|
||||||
|
#if defined(__amd64__)
|
||||||
|
asmv("xsetbv"
|
||||||
|
:
|
||||||
|
: "a"(ControlRegister.raw), "c"(0)
|
||||||
|
: "edx");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
SafeFunction static inline void fxsave(void *FXSaveArea)
|
SafeFunction static inline void fxsave(void *FXSaveArea)
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
@ -3836,6 +3909,320 @@ namespace CPU
|
|||||||
} EDX;
|
} EDX;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** @brief Extended feature flags enumeration */
|
||||||
|
struct CPUID0x7_0
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
uint64_t Reserved : 32;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} EAX;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/** @brief Access to base of fs and gs */
|
||||||
|
uint64_t FSGSBase : 1;
|
||||||
|
/** @brief IA32_TSC_ADJUST MSR */
|
||||||
|
uint64_t IA32TSCAdjust : 1;
|
||||||
|
/** @brief Software Guard Extensions */
|
||||||
|
uint64_t SGX : 1;
|
||||||
|
/** @brief Bit Manipulation Instruction Set 1 */
|
||||||
|
uint64_t BMI1 : 1;
|
||||||
|
/** @brief TSX Hardware Lock Elision */
|
||||||
|
uint64_t HLE : 1;
|
||||||
|
/** @brief Advanced Vector Extensions 2 */
|
||||||
|
uint64_t AVX2 : 1;
|
||||||
|
/** @brief FDP_EXCPTN_ONLY */
|
||||||
|
uint64_t FDPExcptonOnly : 1;
|
||||||
|
/** @brief Supervisor Mode Execution Protection */
|
||||||
|
uint64_t SMEP : 1;
|
||||||
|
/** @brief Bit Manipulation Instruction Set 2 */
|
||||||
|
uint64_t BMI2 : 1;
|
||||||
|
/** @brief Enhanced REP MOVSB/STOSB */
|
||||||
|
uint64_t ERMS : 1;
|
||||||
|
/** @brief INVPCID */
|
||||||
|
uint64_t INVPCID : 1;
|
||||||
|
/** @brief RTM */
|
||||||
|
uint64_t RTM : 1;
|
||||||
|
/** @brief Intel Resource Director Monitoring */
|
||||||
|
uint64_t RDT_M : 1;
|
||||||
|
/** @brief Deprecates FPU CS and DS values */
|
||||||
|
uint64_t DeprecatesFPU : 1;
|
||||||
|
/** @brief Intel Memory Protection Extensions */
|
||||||
|
uint64_t MPX : 1;
|
||||||
|
/** @brief Intel Resource Director Allocation */
|
||||||
|
uint64_t RDT_A : 1;
|
||||||
|
/** @brief AVX-512 Foundation */
|
||||||
|
uint64_t AVX512F : 1;
|
||||||
|
/** @brief AVX-512 Doubleword and Quadword Instructions */
|
||||||
|
uint64_t AVX512DQ : 1;
|
||||||
|
/** @brief RDSEED */
|
||||||
|
uint64_t RDSEED : 1;
|
||||||
|
/** @brief Intel Multi-Precision Add-Carry Instruction Extensions */
|
||||||
|
uint64_t ADX : 1;
|
||||||
|
/** @brief Supervisor Mode Access Prevention */
|
||||||
|
uint64_t SMAP : 1;
|
||||||
|
/** @brief AVX-512 Integer Fused Multiply-Add Instructions */
|
||||||
|
uint64_t AVX512IFMA : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved : 1;
|
||||||
|
/** @brief CLFLUSHOPT */
|
||||||
|
uint64_t CLFLUSHOPT : 1;
|
||||||
|
/** @brief CLWB */
|
||||||
|
uint64_t CLWB : 1;
|
||||||
|
/** @brief Intel Processor Trace */
|
||||||
|
uint64_t IntelProcessorTrace : 1;
|
||||||
|
/** @brief AVX-512 Prefetch Instructions */
|
||||||
|
uint64_t AVX512PF : 1;
|
||||||
|
/** @brief AVX-512 Exponential and Reciprocal Instructions */
|
||||||
|
uint64_t AVX512ER : 1;
|
||||||
|
/** @brief AVX-512 Conflict Detection Instructions */
|
||||||
|
uint64_t AVX512CD : 1;
|
||||||
|
/** @brief SHA Extensions */
|
||||||
|
uint64_t SHA : 1;
|
||||||
|
/** @brief AVX-512 Byte and Word Instructions */
|
||||||
|
uint64_t AVX512BW : 1;
|
||||||
|
/** @brief AVX-512 Vector Length Extensions */
|
||||||
|
uint64_t AVX512VL : 1;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} EBX;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/** @brief PREFETCHWT1 */
|
||||||
|
uint64_t PREFETCHWT1 : 1;
|
||||||
|
/** @brief AVX-512 Vector Bit Manipulation Instructions */
|
||||||
|
uint64_t AVX512VBMI : 1;
|
||||||
|
/** @brief User Mode Instruction Prevention */
|
||||||
|
uint64_t UMIP : 1;
|
||||||
|
/** @brief Memory Protection Keys for User-mode pages */
|
||||||
|
uint64_t PKU : 1;
|
||||||
|
/** @brief PKU enabled by OS */
|
||||||
|
uint64_t OSPKE : 1;
|
||||||
|
/** @brief Timed pause and user-level monitor/wait */
|
||||||
|
uint64_t WaitPKG : 1;
|
||||||
|
/** @brief AVX-512 Vector Bit Manipulation Instructions 2 */
|
||||||
|
uint64_t AVX512VBMI2 : 1;
|
||||||
|
/** @brief Control flow enforcement (CET) shadow stack */
|
||||||
|
uint64_t CET_SS : 1;
|
||||||
|
/** @brief Galois Field instructions */
|
||||||
|
uint64_t GFNI : 1;
|
||||||
|
/** @brief Vector AES instruction set (VEX-256/EVEX) */
|
||||||
|
uint64_t VAES : 1;
|
||||||
|
/** @brief CLMUL instruction set (VEX-256/EVEX) */
|
||||||
|
uint64_t VPCLMULQDQ : 1;
|
||||||
|
/** @brief AVX-512 Vector Neural Network Instructions */
|
||||||
|
uint64_t AVX512VNNI : 1;
|
||||||
|
/** @brief AVX-512 Bit Algorithms Instructions */
|
||||||
|
uint64_t AVX512BITALG : 1;
|
||||||
|
/** @brief IA32_TME related MSRs */
|
||||||
|
uint64_t TME : 1;
|
||||||
|
/** @brief AVX-512 Vector Population Count Double and Quad-word */
|
||||||
|
uint64_t AVX512VPOPCNTDQ : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved0 : 1;
|
||||||
|
/** @brief 5-level paging (57 address bits) */
|
||||||
|
uint64_t LA57 : 1;
|
||||||
|
/** @brief The value of userspace MPX Address-Width Adjust used by the BNDLDX and BNDSTX Intel MPX instructions in 64-bit mode */
|
||||||
|
uint64_t MAWAU : 5;
|
||||||
|
/** @brief Read Processor ID and IA32_TSC_AUX */
|
||||||
|
uint64_t RDPID : 1;
|
||||||
|
/** @brief Key Locker */
|
||||||
|
uint64_t KL : 1;
|
||||||
|
/** @brief BUS_LOCK_DETECT */
|
||||||
|
uint64_t BusLockDetect : 1;
|
||||||
|
/** @brief Cache line demote */
|
||||||
|
uint64_t CLDEMOTE : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved1 : 1;
|
||||||
|
/** @brief MOVDIRI */
|
||||||
|
uint64_t MOVDIRI : 1;
|
||||||
|
/** @brief MOVDIR64B */
|
||||||
|
uint64_t MOVDIR64B : 1;
|
||||||
|
/** @brief SGX Launch Configuration */
|
||||||
|
uint64_t SGX_LC : 1;
|
||||||
|
/** @brief Protection Keys for Supervisor-mode pages */
|
||||||
|
uint64_t PKS : 1;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} ECX;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved0 : 2;
|
||||||
|
/** @brief AVX-512 4-register Neural Network Instructions */
|
||||||
|
uint64_t AVX512_4VNNIW : 1;
|
||||||
|
/** @brief AVX-512 4-register Multiply Accumulation Single Precision */
|
||||||
|
uint64_t AVX512_4FMAPS : 1;
|
||||||
|
/** @brief Fast Short REP MOVSB/STOSB */
|
||||||
|
uint64_t FSRM : 1;
|
||||||
|
/** @brief User Inter-Processor Interrupts */
|
||||||
|
uint64_t UINTR : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved1 : 2;
|
||||||
|
/** @brief AVX-512 VP2INTERSECT Doubleword and Quadword Instructions */
|
||||||
|
uint64_t AVX512_VP2INTERSECT : 1;
|
||||||
|
/** @brief Special Register Buffer Data Sampling Mitigations */
|
||||||
|
uint64_t SRBDS_CTRL : 1;
|
||||||
|
/** @brief VERW instruction clears CPU buffers */
|
||||||
|
uint64_t MC_CLEAR : 1;
|
||||||
|
/** @brief All TSX transactions are aborted */
|
||||||
|
uint64_t TSX_FORCE_ABORT : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved2 : 1;
|
||||||
|
/** @brief TSX_FORCE_ABORT MSR is available */
|
||||||
|
uint64_t TsxForceAbortMsr : 1;
|
||||||
|
/** @brief SERIALIZE */
|
||||||
|
uint64_t SERIALIZE : 1;
|
||||||
|
/** @brief Mixture of CPU types in processor topology */
|
||||||
|
uint64_t HYBRID : 1;
|
||||||
|
/** @brief TSXLDTRK */
|
||||||
|
uint64_t TSXLDTRK : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved3 : 1;
|
||||||
|
/** @brief Platform configuration for Memory Encryption Technologies Instrctuions */
|
||||||
|
uint64_t PCONFIG : 1;
|
||||||
|
/** @brief Architectural Last Branch Records */
|
||||||
|
uint64_t LBR : 1;
|
||||||
|
/** @brief Control flow enforcement (CET) indirect branch tracking */
|
||||||
|
uint64_t CET_IBT : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved4 : 1;
|
||||||
|
/** @brief Tile computation on bfloat16 numbers */
|
||||||
|
uint64_t AMX_BF16 : 1;
|
||||||
|
/** @brief AVX512-FP16 half-precision floating-point instructions */
|
||||||
|
uint64_t AVX512_FP16 : 1;
|
||||||
|
/** @brief Tile architecture */
|
||||||
|
uint64_t AMX_TILE : 1;
|
||||||
|
/** @brief Tile computation on 8-bit integers */
|
||||||
|
uint64_t AMX_INT8 : 1;
|
||||||
|
/** @brief Speculation Control, part of Indirect Branch Control (IBC):
|
||||||
|
Indirect Branch Restricted Speculation (IBRS) and
|
||||||
|
Indirect Branch Prediction Barrier (IBPB) */
|
||||||
|
uint64_t SPEC_CTRL : 1;
|
||||||
|
/** @brief Single Thread Indirect Branch Predictor, part of IBC */
|
||||||
|
uint64_t STIBP : 1;
|
||||||
|
/** @brief IA32_FLUSH_CMD MSR */
|
||||||
|
uint64_t L1D_FLUSH : 1;
|
||||||
|
/** @brief IA32_ARCH_CAPABILITIES (lists speculative side channel mitigations */
|
||||||
|
uint64_t ArchCapabilities : 1;
|
||||||
|
/** @brief IA32_CORE_CAPABILITIES MSR (lists model-specific core capabilities) */
|
||||||
|
uint64_t CoreCapabilities : 1;
|
||||||
|
/** @brief Speculative Store Bypass Disable, as mitigation for Speculative Store Bypass (IA32_SPEC_CTRL) */
|
||||||
|
uint64_t SSBD : 1;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} EDX;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief Extended feature flags enumeration */
|
||||||
|
struct CPUID0x7_1
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
uint64_t Reserved0 : 3;
|
||||||
|
/** @brief RAO-INT */
|
||||||
|
uint64_t RAO_INT : 1;
|
||||||
|
/** @brief AVX Vector Neural Network Instructions (XNNI) (VEX encoded) */
|
||||||
|
uint64_t AVX_VNNI : 1;
|
||||||
|
/** @brief AVX-512 instructions for bfloat16 numbers */
|
||||||
|
uint64_t AVX512_BF16 : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved1 : 1;
|
||||||
|
/** @brief CMPccXADD */
|
||||||
|
uint64_t CMPCCXADD : 1;
|
||||||
|
/** @brief Architectural Performance Monitoring Extended Leaf (EAX=23h) */
|
||||||
|
uint64_t ARCHPERFMONEXT : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved2 : 1;
|
||||||
|
/** @brief Fast zero-length MOVSB */
|
||||||
|
uint64_t FAST_ZERO_REP_MOVSB : 1;
|
||||||
|
/** @brief Fast zero-length STOSB */
|
||||||
|
uint64_t FAST_SHORT_REP_STOSB : 1;
|
||||||
|
/** @brief Fast zero-length CMPSB and SCASB */
|
||||||
|
uint64_t FAST_SHORT_REP_CMPSB_SCASB : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved3 : 4;
|
||||||
|
/** @brief Flexible Return and Event Delivery */
|
||||||
|
uint64_t FRED : 1;
|
||||||
|
/** @brief LKGS Instruction */
|
||||||
|
uint64_t LKGS : 1;
|
||||||
|
/** @brief WRMSRNS instruction */
|
||||||
|
uint64_t WRMSRNS : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved4 : 1;
|
||||||
|
/** @brief AMX instructions for FP16 numbers */
|
||||||
|
uint64_t AMX_FP16 : 1;
|
||||||
|
/** @brief HRESET instruction, IA32_HRESET_ENABLE MSR, and Processor History Reset Leaf (EAX=20h) */
|
||||||
|
uint64_t HRESET : 1;
|
||||||
|
/** @brief AVX IFMA instructions */
|
||||||
|
uint64_t AVX_IFMA : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved5 : 2;
|
||||||
|
/** @brief Linear Address Masking */
|
||||||
|
uint64_t LAM : 1;
|
||||||
|
/** @brief RDMSRLIST and WRMSRLIST instructions, and the IA32_BARRIER MSR */
|
||||||
|
uint64_t MSRLIST : 1;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} EAX;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/** @brief IA32_PPIN and IA32_PPIN_CTL MSRs */
|
||||||
|
uint64_t PPIN : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved : 31;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} EBX;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved : 32;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} ECX;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved0 : 4;
|
||||||
|
/** @brief AVX VNNI INT8 instructions */
|
||||||
|
uint64_t AVX_VNNI_INT8 : 1;
|
||||||
|
/** @brief AVX NE CONVERT instructions */
|
||||||
|
uint64_t AVX_NE_CONVERT : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved1 : 8;
|
||||||
|
/** @brief PREFETCHIT0 and PREFETCHIT1 instructions */
|
||||||
|
uint64_t PREFETCHIT : 1;
|
||||||
|
/** @brief Reserved */
|
||||||
|
uint64_t Reserved2 : 17;
|
||||||
|
};
|
||||||
|
uint64_t raw;
|
||||||
|
} EDX;
|
||||||
|
};
|
||||||
|
|
||||||
/** @brief Performance monitors */
|
/** @brief Performance monitors */
|
||||||
struct CPUID0xA
|
struct CPUID0xA
|
||||||
{
|
{
|
||||||
@ -4661,8 +5048,9 @@ namespace CPU
|
|||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint64_t BranchID : 16;
|
uint64_t BrandId : 16;
|
||||||
uint64_t Reserved0 : 16;
|
uint64_t Reserved0 : 12;
|
||||||
|
uint64_t PkgType : 4;
|
||||||
};
|
};
|
||||||
uint64_t raw;
|
uint64_t raw;
|
||||||
} EBX;
|
} EBX;
|
||||||
@ -4674,9 +5062,28 @@ namespace CPU
|
|||||||
uint64_t LAHF_SAHF : 1;
|
uint64_t LAHF_SAHF : 1;
|
||||||
uint64_t CmpLegacy : 1;
|
uint64_t CmpLegacy : 1;
|
||||||
uint64_t SVM : 1;
|
uint64_t SVM : 1;
|
||||||
uint64_t Reserved0 : 1;
|
uint64_t ExtApicSpace : 1;
|
||||||
uint64_t AltMovCr8 : 1;
|
uint64_t AltMovCr8 : 1;
|
||||||
uint64_t Reserved1 : 26;
|
uint64_t ABM : 1;
|
||||||
|
uint64_t SSE4A : 1;
|
||||||
|
uint64_t MisalignedSSE : 1;
|
||||||
|
uint64_t ThreeDNowPrefetch : 1;
|
||||||
|
uint64_t OSVW : 1;
|
||||||
|
uint64_t IBS : 1;
|
||||||
|
uint64_t XOP : 1;
|
||||||
|
uint64_t SKINIT : 1;
|
||||||
|
uint64_t WDT : 1;
|
||||||
|
uint64_t Reserved0 : 1;
|
||||||
|
uint64_t LWP : 1;
|
||||||
|
uint64_t FMA4 : 1;
|
||||||
|
uint64_t Reserved1 : 1;
|
||||||
|
uint64_t Reserved2 : 1;
|
||||||
|
uint64_t NodeID : 1;
|
||||||
|
uint64_t Reserved3 : 1;
|
||||||
|
uint64_t TBM : 1;
|
||||||
|
uint64_t TopologyExtensions : 1;
|
||||||
|
uint64_t Reserved4 : 9;
|
||||||
|
|
||||||
};
|
};
|
||||||
uint64_t raw;
|
uint64_t raw;
|
||||||
} ECX;
|
} ECX;
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
struct CPUArchData
|
struct CPUArchData
|
||||||
{
|
{
|
||||||
#if defined(__amd64__)
|
#if defined(__amd64__)
|
||||||
int stub;
|
CPU::x64::FXState *FPU;
|
||||||
/* TODO */
|
/* TODO */
|
||||||
#elif defined(__i386__)
|
#elif defined(__i386__)
|
||||||
#elif defined(__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
@ -39,7 +39,7 @@ struct CPUData
|
|||||||
Tasking::TCB *CurrentThread;
|
Tasking::TCB *CurrentThread;
|
||||||
|
|
||||||
/** @brief Architecture-specific data. */
|
/** @brief Architecture-specific data. */
|
||||||
CPUArchData *Data;
|
CPUArchData Data;
|
||||||
/** @brief Checksum. Used to verify the integrity of the data. Must be equal to CPU_DATA_CHECKSUM (0xC0FFEE). */
|
/** @brief Checksum. Used to verify the integrity of the data. Must be equal to CPU_DATA_CHECKSUM (0xC0FFEE). */
|
||||||
int Checksum;
|
int Checksum;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
@ -57,32 +57,6 @@ namespace Tasking
|
|||||||
Terminated
|
Terminated
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FXState
|
|
||||||
{
|
|
||||||
/** @brief FPU control word */
|
|
||||||
uint16_t fcw;
|
|
||||||
/** @brief FPU status word */
|
|
||||||
uint16_t fsw;
|
|
||||||
/** @brief FPU tag words */
|
|
||||||
uint8_t ftw;
|
|
||||||
/** @brief Reserved (zero) */
|
|
||||||
uint8_t Reserved;
|
|
||||||
/** @brief FPU opcode */
|
|
||||||
uint16_t fop;
|
|
||||||
/** @brief PFU instruction pointer */
|
|
||||||
uint64_t rip;
|
|
||||||
/** @brief FPU data pointer */
|
|
||||||
uint64_t rdp;
|
|
||||||
/** @brief SSE control register */
|
|
||||||
uint32_t mxcsr;
|
|
||||||
/** @brief SSE control register mask */
|
|
||||||
uint32_t mxcsrmask;
|
|
||||||
/** @brief FPU registers (last 6 bytes reserved) */
|
|
||||||
uint8_t st[8][16];
|
|
||||||
/** @brief XMM registers */
|
|
||||||
uint8_t xmm[16][16];
|
|
||||||
} __attribute__((packed));
|
|
||||||
|
|
||||||
struct TaskSecurity
|
struct TaskSecurity
|
||||||
{
|
{
|
||||||
TaskTrustLevel TrustLevel;
|
TaskTrustLevel TrustLevel;
|
||||||
@ -130,7 +104,7 @@ namespace Tasking
|
|||||||
uintptr_t IPHistory[128];
|
uintptr_t IPHistory[128];
|
||||||
TaskSecurity Security;
|
TaskSecurity Security;
|
||||||
TaskInfo Info;
|
TaskInfo Info;
|
||||||
FXState *FPU;
|
CPU::x64::FXState *FPU;
|
||||||
|
|
||||||
void Rename(const char *name)
|
void Rename(const char *name)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user