From 450182602530763541ae7193e41a70bfc68f1aef Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 3 Jan 2023 06:36:15 +0200 Subject: [PATCH] Kernel now uses SSE instructions properly --- ...dvancedProgrammableInterruptController.cpp | 41 +- .../amd64/cpu/SymmetricMultiprocessing.cpp | 2 +- Core/CPU.cpp | 263 +++++--- Core/Random.cpp | 6 +- GUI/GraphicalUserInterface.cpp | 34 -- Kernel.cpp | 8 +- Library/{Convert.c => Convert.cpp} | 530 +++++++--------- Library/memcpy.c | 320 ++++++++++ Tasking/Task.cpp | 39 +- Tests/RandomNumberGenerator.cpp | 2 +- include/cpu.hpp | 565 +++++++++++++++--- include/smp.hpp | 4 +- include/task.hpp | 28 +- 13 files changed, 1270 insertions(+), 572 deletions(-) rename Library/{Convert.c => Convert.cpp} (59%) create mode 100644 Library/memcpy.c diff --git a/Architecture/amd64/cpu/AdvancedProgrammableInterruptController.cpp b/Architecture/amd64/cpu/AdvancedProgrammableInterruptController.cpp index 55882ad..e71770e 100644 --- a/Architecture/amd64/cpu/AdvancedProgrammableInterruptController.cpp +++ b/Architecture/amd64/cpu/AdvancedProgrammableInterruptController.cpp @@ -237,13 +237,42 @@ namespace APIC this->APICBaseAddress = BaseLow << 12u | BaseHigh << 32u; trace("APIC Address: %#lx", this->APICBaseAddress); - uint32_t rcx; - cpuid(1, 0, 0, &rcx, 0); - if (rcx & CPUID_FEAT_RCX_x2APIC) + bool x2APICSupported = false; + if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0) { - // this->x2APICSupported = true; - warn("x2APIC not supported yet."); - // wrmsr(MSR_APIC_BASE, (rdmsr(MSR_APIC_BASE) | (1 << 11)) & ~(1 << 10)); +#if defined(__amd64__) + CPU::x64::AMD::CPUID0x1 cpuid1amd; +#elif defined(__i386__) + CPU::x32::AMD::CPUID0x1 cpuid1amd; +#endif +#if defined(__amd64__) || defined(__i386__) + asmv("cpuid" + : "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw) + : "a"(0x1)); +#endif + // FIXME: Not sure if I configured this correctly or something else is wrong + // x2APICSupported = cpuid1amd.ECX.x2APIC; + fixme("AMD does even support x2APIC? ECX->Reserved10: %#lx", cpuid1amd.ECX.Reserved10); + } + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + { +#if defined(__amd64__) + CPU::x64::Intel::CPUID0x1 cpuid1intel; +#elif defined(__i386__) + CPU::x32::Intel::CPUID0x1 cpuid1intel; +#endif +#if defined(__amd64__) || defined(__i386__) + asmv("cpuid" + : "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw) + : "a"(0x1)); +#endif + x2APICSupported = cpuid1intel.ECX.x2APIC; + } + + if (x2APICSupported) + { + this->x2APICSupported = true; + wrmsr(MSR_APIC_BASE, (rdmsr(MSR_APIC_BASE) | (1 << 11)) & ~(1 << 10)); BaseStruct.EN = 1; wrmsr(MSR_APIC_BASE, BaseStruct.raw); } diff --git a/Architecture/amd64/cpu/SymmetricMultiprocessing.cpp b/Architecture/amd64/cpu/SymmetricMultiprocessing.cpp index 243ad56..02b968e 100644 --- a/Architecture/amd64/cpu/SymmetricMultiprocessing.cpp +++ b/Architecture/amd64/cpu/SymmetricMultiprocessing.cpp @@ -53,8 +53,8 @@ CPUData *GetCurrentCPU() extern "C" void StartCPU() { CPU::Interrupts(CPU::Disable); - CPU::InitializeFeatures(); uint64_t CoreID = (int)*reinterpret_cast(CORE); + CPU::InitializeFeatures(CoreID); // Initialize GDT and IDT Interrupts::Initialize(CoreID); Interrupts::Enable(CoreID); diff --git a/Core/CPU.cpp b/Core/CPU.cpp index 3473a5a..88bc791 100644 --- a/Core/CPU.cpp +++ b/Core/CPU.cpp @@ -3,26 +3,29 @@ #include #include #include +#include #include "../kernel.h" namespace CPU { + static bool SSEEnabled = false; + char *Vendor() { static char Vendor[13]; #if defined(__amd64__) - uint32_t rax, rbx, rcx, rdx; - x64::cpuid(0x0, &rax, &rbx, &rcx, &rdx); - memcpy(Vendor + 0, &rbx, 4); - memcpy(Vendor + 4, &rdx, 4); - memcpy(Vendor + 8, &rcx, 4); + uint32_t eax, ebx, ecx, edx; + x64::cpuid(0x0, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Vendor + 0, &ebx, 4); + memcpy_unsafe(Vendor + 4, &edx, 4); + memcpy_unsafe(Vendor + 8, &ecx, 4); #elif defined(__i386__) - uint32_t rax, rbx, rcx, rdx; - x32::cpuid(0x0, &rax, &rbx, &rcx, &rdx); - memcpy(Vendor + 0, &rbx, 4); - memcpy(Vendor + 4, &rdx, 4); - memcpy(Vendor + 8, &rcx, 4); + uint32_t eax, ebx, ecx, edx; + x32::cpuid(0x0, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Vendor + 0, &ebx, 4); + memcpy_unsafe(Vendor + 4, &edx, 4); + memcpy_unsafe(Vendor + 8, &ecx, 4); #elif defined(__aarch64__) asmv("mrs %0, MIDR_EL1" : "=r"(Vendor[0])); @@ -34,39 +37,39 @@ namespace CPU { static char Name[49]; #if defined(__amd64__) - uint32_t rax, rbx, rcx, rdx; - x64::cpuid(0x80000002, &rax, &rbx, &rcx, &rdx); - memcpy(Name + 0, &rax, 4); - memcpy(Name + 4, &rbx, 4); - memcpy(Name + 8, &rcx, 4); - memcpy(Name + 12, &rdx, 4); - x64::cpuid(0x80000003, &rax, &rbx, &rcx, &rdx); - memcpy(Name + 16, &rax, 4); - memcpy(Name + 20, &rbx, 4); - memcpy(Name + 24, &rcx, 4); - memcpy(Name + 28, &rdx, 4); - x64::cpuid(0x80000004, &rax, &rbx, &rcx, &rdx); - memcpy(Name + 32, &rax, 4); - memcpy(Name + 36, &rbx, 4); - memcpy(Name + 40, &rcx, 4); - memcpy(Name + 44, &rdx, 4); + uint32_t eax, ebx, ecx, edx; + x64::cpuid(0x80000002, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Name + 0, &eax, 4); + memcpy_unsafe(Name + 4, &ebx, 4); + memcpy_unsafe(Name + 8, &ecx, 4); + memcpy_unsafe(Name + 12, &edx, 4); + x64::cpuid(0x80000003, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Name + 16, &eax, 4); + memcpy_unsafe(Name + 20, &ebx, 4); + memcpy_unsafe(Name + 24, &ecx, 4); + memcpy_unsafe(Name + 28, &edx, 4); + x64::cpuid(0x80000004, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Name + 32, &eax, 4); + memcpy_unsafe(Name + 36, &ebx, 4); + memcpy_unsafe(Name + 40, &ecx, 4); + memcpy_unsafe(Name + 44, &edx, 4); #elif defined(__i386__) - uint32_t rax, rbx, rcx, rdx; - x32::cpuid(0x80000002, &rax, &rbx, &rcx, &rdx); - memcpy(Name + 0, &rax, 4); - memcpy(Name + 4, &rbx, 4); - memcpy(Name + 8, &rcx, 4); - memcpy(Name + 12, &rdx, 4); - x32::cpuid(0x80000003, &rax, &rbx, &rcx, &rdx); - memcpy(Name + 16, &rax, 4); - memcpy(Name + 20, &rbx, 4); - memcpy(Name + 24, &rcx, 4); - memcpy(Name + 28, &rdx, 4); - x32::cpuid(0x80000004, &rax, &rbx, &rcx, &rdx); - memcpy(Name + 32, &rax, 4); - memcpy(Name + 36, &rbx, 4); - memcpy(Name + 40, &rcx, 4); - memcpy(Name + 44, &rdx, 4); + uint32_t eax, ebx, ecx, edx; + x32::cpuid(0x80000002, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Name + 0, &eax, 4); + memcpy_unsafe(Name + 4, &ebx, 4); + memcpy_unsafe(Name + 8, &ecx, 4); + memcpy_unsafe(Name + 12, &edx, 4); + x32::cpuid(0x80000003, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Name + 16, &eax, 4); + memcpy_unsafe(Name + 20, &ebx, 4); + memcpy_unsafe(Name + 24, &ecx, 4); + memcpy_unsafe(Name + 28, &edx, 4); + x32::cpuid(0x80000004, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Name + 32, &eax, 4); + memcpy_unsafe(Name + 36, &ebx, 4); + memcpy_unsafe(Name + 40, &ecx, 4); + memcpy_unsafe(Name + 44, &edx, 4); #elif defined(__aarch64__) asmv("mrs %0, MIDR_EL1" : "=r"(Name[0])); @@ -78,17 +81,17 @@ namespace CPU { static char Hypervisor[13]; #if defined(__amd64__) - uint32_t rax, rbx, rcx, rdx; - x64::cpuid(0x40000000, &rax, &rbx, &rcx, &rdx); - memcpy(Hypervisor + 0, &rbx, 4); - memcpy(Hypervisor + 4, &rcx, 4); - memcpy(Hypervisor + 8, &rdx, 4); + uint32_t eax, ebx, ecx, edx; + x64::cpuid(0x40000000, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Hypervisor + 0, &ebx, 4); + memcpy_unsafe(Hypervisor + 4, &ecx, 4); + memcpy_unsafe(Hypervisor + 8, &edx, 4); #elif defined(__i386__) - uint32_t rax, rbx, rcx, rdx; - x64::cpuid(0x40000000, &rax, &rbx, &rcx, &rdx); - memcpy(Hypervisor + 0, &rbx, 4); - memcpy(Hypervisor + 4, &rcx, 4); - memcpy(Hypervisor + 8, &rdx, 4); + uint32_t eax, ebx, ecx, edx; + x64::cpuid(0x40000000, &eax, &ebx, &ecx, &edx); + memcpy_unsafe(Hypervisor + 0, &ebx, 4); + memcpy_unsafe(Hypervisor + 4, &ecx, 4); + memcpy_unsafe(Hypervisor + 8, &edx, 4); #elif defined(__aarch64__) asmv("mrs %0, MIDR_EL1" : "=r"(Hypervisor[0])); @@ -171,15 +174,51 @@ namespace CPU return PT; } - void InitializeFeatures() + void InitializeFeatures(long Core) { + bool PGESupport = false; + bool SSESupport = false; #if defined(__amd64__) static int BSP = 0; x64::CR0 cr0 = x64::readcr0(); x64::CR4 cr4 = x64::readcr4(); - uint32_t rax, rbx, rcx, rdx; - x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx); - if (rdx & x64::CPUID_FEAT_RDX_PGE) + + if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0) + { +#if defined(__amd64__) + CPU::x64::AMD::CPUID0x1 cpuid1amd; +#elif defined(__i386__) + CPU::x32::AMD::CPUID0x1 cpuid1amd; +#endif +#if defined(__amd64__) || defined(__i386__) + asmv("cpuid" + : "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw) + : "a"(0x1)); +#endif + if (cpuid1amd.EDX.PGE) + PGESupport = true; + if (cpuid1amd.EDX.SSE) + SSESupport = true; + } + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + { +#if defined(__amd64__) + CPU::x64::Intel::CPUID0x1 cpuid1intel; +#elif defined(__i386__) + CPU::x32::Intel::CPUID0x1 cpuid1intel; +#endif +#if defined(__amd64__) || defined(__i386__) + asmv("cpuid" + : "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw) + : "a"(0x1)); +#endif + if (cpuid1intel.EDX.PGE) + PGESupport = true; + if (cpuid1intel.EDX.SSE) + SSESupport = true; + } + + if (PGESupport) { debug("Enabling global pages support..."); if (!BSP) @@ -187,16 +226,29 @@ namespace CPU cr4.PGE = 1; } - if (rdx & x64::CPUID_FEAT_RDX_SSE) - { - debug("Enabling SSE support..."); - if (!BSP) - KPrint("SSE is supported."); - cr0.EM = 0; - cr0.MP = 1; - cr4.OSFXSR = 1; - cr4.OSXMMEXCPT = 1; - } + bool SSEEnableAfter = false; + + if (strcmp(CPU::Hypervisor(), x86_CPUID_VENDOR_TCG) != 0) /* Not sure if my code is not working properly or something else is the issue. */ + if (SSESupport) + { + debug("Enabling SSE support..."); + if (!BSP) + KPrint("SSE is supported."); + cr0.EM = 0; + cr0.MP = 1; + cr4.OSFXSR = 1; + cr4.OSXMMEXCPT = 1; + + CPUData *CoreData = GetCPU(Core); + CoreData->Data.FPU = (CPU::x64::FXState *)KernelAllocator.RequestPages(TO_PAGES(sizeof(CPU::x64::FXState))); + memset(CoreData->Data.FPU, 0, FROM_PAGES(TO_PAGES(sizeof(CPU::x64::FXState)))); + CoreData->Data.FPU->mxcsr = 0b0001111110000000; + CoreData->Data.FPU->mxcsrmask = 0b1111111110111111; + CoreData->Data.FPU->fcw = 0b0000001100111111; + CPU::x64::fxrstor(CoreData->Data.FPU); + + SSEEnableAfter = true; + } if (!BSP) KPrint("Enabling CPU cache."); @@ -207,24 +259,26 @@ namespace CPU x64::writecr0(cr0); + // FIXME: I don't think this is reporting correctly. This has to be fixed asap. debug("Enabling UMIP, SMEP & SMAP support..."); - x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx); - if (rdx & x64::CPUID_FEAT_RDX_UMIP) // https://en.wikipedia.org/wiki/Control_register + uint32_t eax, ebx, ecx, edx; + x64::cpuid(0x1, &eax, &ebx, &ecx, &edx); + if (edx & (1 << 2)) // https://en.wikipedia.org/wiki/Control_register { if (!BSP) KPrint("UMIP is supported."); debug("UMIP is supported."); // cr4.UMIP = 1; } - if (rdx & x64::CPUID_FEAT_RDX_SMEP) // https://en.wikipedia.org/wiki/Control_register#SMEP - // https://web.archive.org/web/20160312223150/http://ncsi.com/nsatc11/presentations/wednesday/emerging_technologies/fischer.pdf + if (edx & (1 << 7)) // https://en.wikipedia.org/wiki/Control_register#SMEP + // https://web.archive.org/web/20160312223150/http://ncsi.com/nsatc11/presentations/wednesday/emerging_technologies/fischer.pdf { if (!BSP) KPrint("SMEP is supported."); debug("SMEP is supported."); // cr4.SMEP = 1; } - if (rdx & x64::CPUID_FEAT_RDX_SMAP) // https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention + if (edx & (1 << 20)) // https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention { if (!BSP) KPrint("SMAP is supported."); @@ -252,6 +306,8 @@ namespace CPU x64::wrmsr(x64::MSR_CR_PAT, 0x6 | (0x0 << 8) | (0x1 << 16)); if (!BSP++) trace("Features for BSP initialized."); + if (SSEEnableAfter) + SSEEnabled = true; #elif defined(__i386__) #elif defined(__aarch64__) #endif @@ -276,6 +332,14 @@ namespace CPU x86SIMDType CheckSIMD() { + if (unlikely(!SSEEnabled)) + return SIMD_NONE; + + static x86SIMDType SIMDType = SIMD_NONE; + + if (likely(SIMDType != SIMD_NONE)) + return SIMDType; + if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0) { #if defined(__amd64__) @@ -289,17 +353,32 @@ namespace CPU : "a"(0x1)); #endif if (cpuid1amd.ECX.SSE4_2) - return SIMD_SSE42; + SIMDType = SIMD_SSE42; else if (cpuid1amd.ECX.SSE4_1) - return SIMD_SSE41; + SIMDType = SIMD_SSE41; else if (cpuid1amd.ECX.SSE3) - return SIMD_SSE3; + SIMDType = SIMD_SSE3; else if (cpuid1amd.EDX.SSE2) - return SIMD_SSE2; + SIMDType = SIMD_SSE2; else if (cpuid1amd.EDX.SSE) - return SIMD_SSE; + SIMDType = SIMD_SSE; + +#ifdef DEBUG + if (cpuid1amd.ECX.SSE4_2) + debug("SSE4.2 is supported."); + if (cpuid1amd.ECX.SSE4_1) + debug("SSE4.1 is supported."); + if (cpuid1amd.ECX.SSE3) + debug("SSE3 is supported."); + if (cpuid1amd.EDX.SSE2) + debug("SSE2 is supported."); + if (cpuid1amd.EDX.SSE) + debug("SSE is supported."); +#endif + + return SIMDType; } - if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) { #if defined(__amd64__) CPU::x64::Intel::CPUID0x1 cpuid1intel; @@ -312,15 +391,30 @@ namespace CPU : "a"(0x1)); #endif if (cpuid1intel.ECX.SSE4_2) - return SIMD_SSE42; + SIMDType = SIMD_SSE42; else if (cpuid1intel.ECX.SSE4_1) - return SIMD_SSE41; + SIMDType = SIMD_SSE41; else if (cpuid1intel.ECX.SSE3) - return SIMD_SSE3; + SIMDType = SIMD_SSE3; else if (cpuid1intel.EDX.SSE2) - return SIMD_SSE2; + SIMDType = SIMD_SSE2; else if (cpuid1intel.EDX.SSE) - return SIMD_SSE; + SIMDType = SIMD_SSE; + +#ifdef DEBUG + if (cpuid1intel.ECX.SSE4_2) + debug("SSE4.2 is supported."); + if (cpuid1intel.ECX.SSE4_1) + debug("SSE4.1 is supported."); + if (cpuid1intel.ECX.SSE3) + debug("SSE3 is supported."); + if (cpuid1intel.EDX.SSE2) + debug("SSE2 is supported."); + if (cpuid1intel.EDX.SSE) + debug("SSE is supported."); +#endif + + return SIMDType; } return SIMD_NONE; @@ -328,6 +422,9 @@ namespace CPU bool CheckSIMD(x86SIMDType Type) { + if (unlikely(!SSEEnabled)) + return false; + if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0) { #if defined(__amd64__) @@ -351,7 +448,7 @@ namespace CPU else if (Type == SIMD_SSE) return cpuid1amd.EDX.SSE; } - if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) { #if defined(__amd64__) CPU::x64::Intel::CPUID0x1 cpuid1intel; diff --git a/Core/Random.cpp b/Core/Random.cpp index 53621c0..c3f5b1b 100644 --- a/Core/Random.cpp +++ b/Core/Random.cpp @@ -22,7 +22,7 @@ namespace Random #endif RDRANDFlag = cpuid1amd.ECX.RDRAND; } - if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) { #if defined(__amd64__) CPU::x64::Intel::CPUID0x1 cpuid1intel; @@ -71,7 +71,7 @@ namespace Random #endif RDRANDFlag = cpuid1amd.ECX.RDRAND; } - if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) { #if defined(__amd64__) CPU::x64::Intel::CPUID0x1 cpuid1intel; @@ -120,7 +120,7 @@ namespace Random #endif RDRANDFlag = cpuid1amd.ECX.RDRAND; } - if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) { #if defined(__amd64__) CPU::x64::Intel::CPUID0x1 cpuid1intel; diff --git a/GUI/GraphicalUserInterface.cpp b/GUI/GraphicalUserInterface.cpp index f5622d1..ec5ff2c 100644 --- a/GUI/GraphicalUserInterface.cpp +++ b/GUI/GraphicalUserInterface.cpp @@ -129,24 +129,6 @@ namespace GraphicalUserInterface 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // }; - uint32_t DesktopFadeEffect[] = { - 0xFF000000, - 0x010101, - 0x040404, - 0x080808, - 0x101010, - 0x121212, - 0x151515, - 0x181818, - 0x1A1A1A, - 0x1D1D1D, - 0x1F1F1F, - 0x222222, - 0x242424, - 0x262626, - 0x282828, - }; - char CloseButton[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @@ -262,22 +244,6 @@ namespace GraphicalUserInterface 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // }; - uint32_t CloseButtonFade[] = { - 0x404040, - 0x770000, - 0x990000, - 0xBB0000, - 0xDD0000, - 0xFF0000, - }; - - uint32_t MaximizeMinimizeButtonFade[] = { - 0x404040, - 0x454545, - 0x505050, - 0x5F5F5F, - }; - O1 void GUI::FetchInputs() { KernelCallback callback; diff --git a/Kernel.cpp b/Kernel.cpp index 251a1ba..e0ebb07 100644 --- a/Kernel.cpp +++ b/Kernel.cpp @@ -34,6 +34,12 @@ * CREDITS AND REFERENCES: * - General: * https://wiki.osdev.org/Main_Page + * + * - CPU XCR0 structure: + * https://wiki.osdev.org/CPU_Registers_x86#XCR0 + * + * - CPUID 0x7: + * https://en.wikipedia.org/wiki/CPUID * * - Network: * https://web.archive.org/web/20051210132103/http://users.pcnet.ro/dmoroian/beej/Beej.html @@ -123,7 +129,7 @@ EXTERNC __no_instrument_function void Main(BootInfo *Info) KPrint("Initializing GDT and IDT"); Interrupts::Initialize(0); KPrint("Initializing CPU Features"); - CPU::InitializeFeatures(); + CPU::InitializeFeatures(0); KPrint("Loading Kernel Symbols"); KernelSymbolTable = new SymbolResolver::Symbols((uintptr_t)Info->Kernel.FileBase); KPrint("Reading Kernel Parameters"); diff --git a/Library/Convert.c b/Library/Convert.cpp similarity index 59% rename from Library/Convert.c rename to Library/Convert.cpp index a4dd33e..f6e4dff 100644 --- a/Library/Convert.c +++ b/Library/Convert.cpp @@ -3,346 +3,191 @@ #include #include #include +#include -// TODO: Replace mem* with assembly code - -/* Some of the functions are from musl library */ -/* https://www.musl-libc.org/ */ -/* -Copyright © 2005-2020 Rich Felker, et al. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -void *memcpy_unsafe(void *dest, const void *src, size_t n) +EXTERNC void *memcpy_sse(void *dest, const void *src, size_t n) { - unsigned char *d = dest; - const unsigned char *s = src; + char *d = (char *)dest; + const char *s = (const char *)src; -#ifdef __GNUC__ - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define LS >> -#define RS << -#else -#define LS << -#define RS >> -#endif - - typedef uint32_t __attribute__((__may_alias__)) u32; - uint32_t w, x; - - for (; (uintptr_t)s % 4 && n; n--) - *d++ = *s++; - - if ((uintptr_t)d % 4 == 0) + if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0) { - for (; n >= 16; s += 16, d += 16, n -= 16) + size_t num_vectors = n / 16; + for (size_t i = 0; i < num_vectors; i++) { - *(u32 *)(d + 0) = *(u32 *)(s + 0); - *(u32 *)(d + 4) = *(u32 *)(s + 4); - *(u32 *)(d + 8) = *(u32 *)(s + 8); - *(u32 *)(d + 12) = *(u32 *)(s + 12); + asmv("movaps (%0), %%xmm0\n" + "movaps %%xmm0, (%1)\n" + : + : "r"(s), "r"(d) + : "xmm0"); + d += 16; + s += 16; } - if (n & 8) + + n -= num_vectors * 16; + } + + memcpy_unsafe(d, s, n); + return dest; +} + +EXTERNC void *memcpy_sse2(void *dest, const void *src, size_t n) +{ + char *d = (char *)dest; + const char *s = (const char *)src; + + if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0) + { + size_t num_vectors = n / 16; + for (size_t i = 0; i < num_vectors; i++) { - *(u32 *)(d + 0) = *(u32 *)(s + 0); - *(u32 *)(d + 4) = *(u32 *)(s + 4); + asmv("movdqa (%0), %%xmm0\n" + "movdqa %%xmm0, (%1)\n" + : + : "r"(s), "r"(d) + : "xmm0"); + d += 16; + s += 16; + } + + n -= num_vectors * 16; + } + + memcpy_unsafe(d, s, n); + return dest; +} + +EXTERNC void *memcpy_sse3(void *dest, const void *src, size_t n) +{ + char *d = (char *)dest; + const char *s = (const char *)src; + + if ((((uintptr_t)d | (uintptr_t)s) & 0x7) == 0) + { + size_t num_vectors = n / 8; + for (size_t i = 0; i < num_vectors; i++) + { + asmv("movq (%0), %%xmm0\n" + "movddup %%xmm0, %%xmm1\n" + "movq %%xmm1, (%1)\n" + : + : "r"(s), "r"(d) + : "xmm0", "xmm1"); d += 8; s += 8; } - if (n & 4) - { - *(u32 *)(d + 0) = *(u32 *)(s + 0); - d += 4; - s += 4; - } - if (n & 2) - { - *d++ = *s++; - *d++ = *s++; - } - if (n & 1) - { - *d = *s; - } - return dest; + + n -= num_vectors * 8; } - if (n >= 32) - switch ((uintptr_t)d % 4) - { - case 1: - w = *(u32 *)s; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - n -= 3; - for (; n >= 17; s += 16, d += 16, n -= 16) - { - x = *(u32 *)(s + 1); - *(u32 *)(d + 0) = (w LS 24) | (x RS 8); - w = *(u32 *)(s + 5); - *(u32 *)(d + 4) = (x LS 24) | (w RS 8); - x = *(u32 *)(s + 9); - *(u32 *)(d + 8) = (w LS 24) | (x RS 8); - w = *(u32 *)(s + 13); - *(u32 *)(d + 12) = (x LS 24) | (w RS 8); - } - break; - case 2: - w = *(u32 *)s; - *d++ = *s++; - *d++ = *s++; - n -= 2; - for (; n >= 18; s += 16, d += 16, n -= 16) - { - x = *(u32 *)(s + 2); - *(u32 *)(d + 0) = (w LS 16) | (x RS 16); - w = *(u32 *)(s + 6); - *(u32 *)(d + 4) = (x LS 16) | (w RS 16); - x = *(u32 *)(s + 10); - *(u32 *)(d + 8) = (w LS 16) | (x RS 16); - w = *(u32 *)(s + 14); - *(u32 *)(d + 12) = (x LS 16) | (w RS 16); - } - break; - case 3: - w = *(u32 *)s; - *d++ = *s++; - n -= 1; - for (; n >= 19; s += 16, d += 16, n -= 16) - { - x = *(u32 *)(s + 3); - *(u32 *)(d + 0) = (w LS 8) | (x RS 24); - w = *(u32 *)(s + 7); - *(u32 *)(d + 4) = (x LS 8) | (w RS 24); - x = *(u32 *)(s + 11); - *(u32 *)(d + 8) = (w LS 8) | (x RS 24); - w = *(u32 *)(s + 15); - *(u32 *)(d + 12) = (x LS 8) | (w RS 24); - } - break; - } - if (n & 16) - { - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - } - if (n & 8) - { - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - } - if (n & 4) - { - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - } - if (n & 2) - { - *d++ = *s++; - *d++ = *s++; - } - if (n & 1) - { - *d = *s; - } - return dest; -#endif - - for (; n; n--) - *d++ = *s++; + memcpy_unsafe(d, s, n); return dest; } -void *memset_unsafe(void *dest, int c, size_t n) +EXTERNC void *memcpy_ssse3(void *dest, const void *src, size_t n) { - unsigned char *s = dest; - size_t k; + char *d = (char *)dest; + const char *s = (const char *)src; - if (!n) - return dest; - s[0] = c; - s[n - 1] = c; - if (n <= 2) - return dest; - s[1] = c; - s[2] = c; - s[n - 2] = c; - s[n - 3] = c; - if (n <= 6) - return dest; - s[3] = c; - s[n - 4] = c; - if (n <= 8) - return dest; - - k = -(uintptr_t)s & 3; - s += k; - n -= k; - n &= -4; - -#ifdef __GNUC__ - typedef uint32_t __attribute__((__may_alias__)) u32; - typedef uint64_t __attribute__((__may_alias__)) u64; - - u32 c32 = ((u32)-1) / 255 * (unsigned char)c; - *(u32 *)(s + 0) = c32; - *(u32 *)(s + n - 4) = c32; - if (n <= 8) - return dest; - *(u32 *)(s + 4) = c32; - *(u32 *)(s + 8) = c32; - *(u32 *)(s + n - 12) = c32; - *(u32 *)(s + n - 8) = c32; - if (n <= 24) - return dest; - *(u32 *)(s + 12) = c32; - *(u32 *)(s + 16) = c32; - *(u32 *)(s + 20) = c32; - *(u32 *)(s + 24) = c32; - *(u32 *)(s + n - 28) = c32; - *(u32 *)(s + n - 24) = c32; - *(u32 *)(s + n - 20) = c32; - *(u32 *)(s + n - 16) = c32; - - k = 24 + ((uintptr_t)s & 4); - s += k; - n -= k; - - u64 c64 = c32 | ((u64)c32 << 32); - for (; n >= 32; n -= 32, s += 32) + if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0) { - *(u64 *)(s + 0) = c64; - *(u64 *)(s + 8) = c64; - *(u64 *)(s + 16) = c64; - *(u64 *)(s + 24) = c64; - } -#else - for (; n; n--, s++) - *s = c; -#endif + size_t num_vectors = n / 16; + for (size_t i = 0; i < num_vectors; i++) + { + asmv("movdqa (%0), %%xmm0\n" + "movdqa 16(%0), %%xmm1\n" + "palignr $8, %%xmm0, %%xmm1\n" + "movdqa %%xmm1, (%1)\n" + : + : "r"(s), "r"(d) + : "xmm0", "xmm1"); + d += 16; + s += 16; + } + n -= num_vectors * 16; + } + + memcpy_unsafe(d, s, n); return dest; } -void *memmove_unsafe(void *dest, const void *src, size_t n) +EXTERNC void *memcpy_sse4_1(void *dest, const void *src, size_t n) { -#ifdef __GNUC__ - typedef __attribute__((__may_alias__)) size_t WT; -#define WS (sizeof(WT)) -#endif + CPU::__m128i *d = (CPU::__m128i *)dest; + const CPU::__m128i *s = (const CPU::__m128i *)src; - char *d = dest; - const char *s = src; - - if (d == s) - return d; - if ((uintptr_t)s - (uintptr_t)d - n <= -2 * n) - return memcpy(d, s, n); - - if (d < s) + if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0) { -#ifdef __GNUC__ - if ((uintptr_t)s % WS == (uintptr_t)d % WS) + size_t num_vectors = n / 16; + for (size_t i = 0; i < num_vectors; i++) { - while ((uintptr_t)d % WS) - { - if (!n--) - return dest; - *d++ = *s++; - } - for (; n >= WS; n -= WS, d += WS, s += WS) - *(WT *)d = *(WT *)s; + // movntdqa + asmv("movdqa (%0), %%xmm0\n" + "movdqa %%xmm0, (%1)\n" + : + : "r"(s), "r"(d) + : "xmm0"); + d += 16; + s += 16; } -#endif - for (; n; n--) - *d++ = *s++; - } - else - { -#ifdef __GNUC__ - if ((uintptr_t)s % WS == (uintptr_t)d % WS) - { - while ((uintptr_t)(d + n) % WS) - { - if (!n--) - return dest; - d[n] = s[n]; - } - while (n >= WS) - n -= WS, *(WT *)(d + n) = *(WT *)(s + n); - } -#endif - while (n) - n--, d[n] = s[n]; + + n -= num_vectors * 16; } + memcpy_unsafe(d, s, n); return dest; } -int memcmp(const void *vl, const void *vr, size_t n) +EXTERNC void *memcpy_sse4_2(void *dest, const void *src, size_t n) { - const unsigned char *l = vl, *r = vr; + char *d = (char *)dest; + const char *s = (const char *)src; + + if ((((uintptr_t)d | (uintptr_t)s) & 0xF) == 0) + { + size_t num_vectors = n / 16; + for (size_t i = 0; i < num_vectors; i++) + { + asmv("movdqa (%0), %%xmm0\n" + "pcmpistri $0, (%0), %%xmm0\n" + "movdqa %%xmm0, (%1)\n" + : + : "r"(s), "r"(d) + : "xmm0"); + d += 16; + s += 16; + } + + n -= num_vectors * 16; + } + + memcpy_unsafe(d, s, n); + return dest; +} + +EXTERNC int memcmp(const void *vl, const void *vr, size_t n) +{ + const unsigned char *l = (unsigned char *)vl, *r = (unsigned char *)vr; for (; n && *l == *r; n--, l++, r++) ; return n ? *l - *r : 0; } -void backspace(char s[]) +EXTERNC void backspace(char s[]) { int len = strlen(s); s[len - 1] = '\0'; } -void append(char s[], char n) +EXTERNC void append(char s[], char n) { int len = strlen(s); s[len] = n; s[len + 1] = '\0'; } -int strncmp(const char *s1, const char *s2, size_t n) +EXTERNC int strncmp(const char *s1, const char *s2, size_t n) { for (size_t i = 0; i < n; i++) { @@ -355,7 +200,7 @@ int strncmp(const char *s1, const char *s2, size_t n) return 0; } -long unsigned strlen(const char s[]) +EXTERNC long unsigned strlen(const char s[]) { long unsigned i = 0; if (s) @@ -364,7 +209,7 @@ long unsigned strlen(const char s[]) return i; } -char *strcat_unsafe(char *destination, const char *source) +EXTERNC char *strcat_unsafe(char *destination, const char *source) { if ((destination == NULL) || (source == NULL)) return NULL; @@ -381,7 +226,7 @@ char *strcat_unsafe(char *destination, const char *source) return destination; } -char *strcpy_unsafe(char *destination, const char *source) +EXTERNC char *strcpy_unsafe(char *destination, const char *source) { if (destination == NULL) return NULL; @@ -396,7 +241,7 @@ char *strcpy_unsafe(char *destination, const char *source) return ptr; } -char *strncpy(char *destination, const char *source, unsigned long num) +EXTERNC char *strncpy(char *destination, const char *source, unsigned long num) { if (destination == NULL) return NULL; @@ -411,14 +256,14 @@ char *strncpy(char *destination, const char *source, unsigned long num) return ptr; } -int strcmp(const char *l, const char *r) +EXTERNC int strcmp(const char *l, const char *r) { for (; *l == *r && *l; l++, r++) ; return *(unsigned char *)l - *(unsigned char *)r; } -char *strstr(const char *haystack, const char *needle) +EXTERNC char *strstr(const char *haystack, const char *needle) { const char *a = haystack, *b = needle; while (1) @@ -435,7 +280,7 @@ char *strstr(const char *haystack, const char *needle) } } -char *strchr(const char *String, int Char) +EXTERNC char *strchr(const char *String, int Char) { while (*String != (char)Char) { @@ -445,24 +290,24 @@ char *strchr(const char *String, int Char) return (char *)String; } -char *strdup(const char *String) +EXTERNC char *strdup(const char *String) { - char *OutBuffer = kmalloc(strlen((char *)String) + 1); + char *OutBuffer = (char *)kmalloc(strlen((char *)String) + 1); strncpy(OutBuffer, String, strlen(String) + 1); return OutBuffer; } -int isalpha(int c) +EXTERNC int isalpha(int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } -int isupper(int c) +EXTERNC int isupper(int c) { return (c >= 'A' && c <= 'Z'); } -long int strtol(const char *str, char **endptr, int base) +EXTERNC long int strtol(const char *str, char **endptr, int base) { const char *s; long acc, cutoff; @@ -527,7 +372,7 @@ long int strtol(const char *str, char **endptr, int base) return (acc); } -unsigned long int strtoul(const char *str, char **endptr, int base) +EXTERNC unsigned long int strtoul(const char *str, char **endptr, int base) { const char *s; unsigned long acc, cutoff; @@ -592,17 +437,17 @@ unsigned long int strtoul(const char *str, char **endptr, int base) return (acc); } -int isdigit(int c) +EXTERNC int isdigit(int c) { return c >= '0' && c <= '9'; } -int isspace(int c) +EXTERNC int isspace(int c) { return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || c == '\v'; } -int isempty(char *str) +EXTERNC int isempty(char *str) { if (strlen(str) == 0) return 1; @@ -615,7 +460,7 @@ int isempty(char *str) return 1; } -unsigned int isdelim(char c, char *delim) +EXTERNC unsigned int isdelim(char c, char *delim) { while (*delim != '\0') { @@ -626,23 +471,23 @@ unsigned int isdelim(char c, char *delim) return 0; } -int abs(int i) { return i < 0 ? -i : i; } +EXTERNC int abs(int i) { return i < 0 ? -i : i; } -void swap(char *x, char *y) +EXTERNC void swap(char *x, char *y) { char t = *x; *x = *y; *y = t; } -char *reverse(char *Buffer, int i, int j) +EXTERNC char *reverse(char *Buffer, int i, int j) { while (i < j) swap(&Buffer[i++], &Buffer[j--]); return Buffer; } -float sqrtf(float x) +EXTERNC float sqrtf(float x) { if (x < 0.0f) return NAN; @@ -660,7 +505,7 @@ float sqrtf(float x) return guess; } -double clamp(double x, double low, double high) +EXTERNC double clamp(double x, double low, double high) { if (x < low) return low; @@ -670,25 +515,25 @@ double clamp(double x, double low, double high) return x; } -float lerp(float a, float b, float t) +EXTERNC float lerp(float a, float b, float t) { return (1 - t) * a + t * b; } -float smoothstep(float a, float b, float t) +EXTERNC float smoothstep(float a, float b, float t) { t = clamp(t, 0.0, 1.0); return lerp(a, b, t * t * (3 - 2 * t)); } -float cubicInterpolate(float a, float b, float t) +EXTERNC float cubicInterpolate(float a, float b, float t) { float t2 = t * t; float t3 = t2 * t; return a + (-2 * t3 + 3 * t2) * b; } -char *strtok(char *src, const char *delim) +EXTERNC char *strtok(char *src, const char *delim) { static char *src1; if (!src) @@ -728,7 +573,7 @@ char *strtok(char *src, const char *delim) return NULL; } -int atoi(const char *String) +EXTERNC int atoi(const char *String) { uint64_t Length = strlen((char *)String); uint64_t OutBuffer = 0; @@ -741,7 +586,7 @@ int atoi(const char *String) return OutBuffer; } -double atof(const char *String) +EXTERNC double atof(const char *String) { // Originally from https://github.com/GaloisInc/minlibc/blob/master/atof.c /* @@ -823,7 +668,7 @@ double atof(const char *String) return a; } -char *itoa(int Value, char *Buffer, int Base) +EXTERNC char *itoa(int Value, char *Buffer, int Base) { if (Base < 2 || Base > 32) return Buffer; @@ -851,7 +696,7 @@ char *itoa(int Value, char *Buffer, int Base) return reverse(Buffer, 0, i - 1); } -char *ltoa(long Value, char *Buffer, int Base) +EXTERNC char *ltoa(long Value, char *Buffer, int Base) { if (Base < 2 || Base > 32) return Buffer; @@ -879,7 +724,7 @@ char *ltoa(long Value, char *Buffer, int Base) return reverse(Buffer, 0, i - 1); } -char *ultoa(unsigned long Value, char *Buffer, int Base) +EXTERNC char *ultoa(unsigned long Value, char *Buffer, int Base) { if (Base < 2 || Base > 32) return Buffer; @@ -904,7 +749,7 @@ char *ultoa(unsigned long Value, char *Buffer, int Base) return reverse(Buffer, 0, i - 1); } -extern void __chk_fail(void) __attribute__((__noreturn__)); +EXTERNC void __chk_fail(void) __attribute__((__noreturn__)); __noreturn static inline void __convert_chk_fail(void) { @@ -918,7 +763,7 @@ __noreturn static inline void __convert_chk_fail(void) // #define DBG_CHK 1 -__no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len, size_t slen) +EXTERNC __no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len, size_t slen) { #ifdef DBG_CHK debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen); @@ -949,10 +794,36 @@ __no_stack_protector void *__memcpy_chk(void *dest, const void *src, size_t len, if (unlikely(len > slen)) __chk_fail(); - return memcpy_unsafe(dest, src, len); + + switch (CPU::CheckSIMD()) + { + case CPU::x86SIMDType::SIMD_SSE: + return memcpy_sse(dest, src, len); + break; + case CPU::x86SIMDType::SIMD_SSE2: + return memcpy_sse2(dest, src, len); + break; + case CPU::x86SIMDType::SIMD_SSE3: + return memcpy_sse3(dest, src, len); + break; + case CPU::x86SIMDType::SIMD_SSSE3: + return memcpy_ssse3(dest, src, len); + break; + case CPU::x86SIMDType::SIMD_SSE41: + return memcpy_sse4_1(dest, src, len); + break; + case CPU::x86SIMDType::SIMD_SSE42: + return memcpy_sse4_2(dest, src, len); + break; + default: + return memcpy_unsafe(dest, src, len); + break; + } + error("Should not be here!"); + CPU::Stop(); } -__no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t slen) +EXTERNC __no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t slen) { #ifdef DBG_CHK debug("( dest:%#lx val:%#x len:%llu slen:%llu )", dest, val, len, slen); @@ -980,7 +851,7 @@ __no_stack_protector void *__memset_chk(void *dest, int val, size_t len, size_t return memset_unsafe(dest, val, len); } -__no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len, size_t slen) +EXTERNC __no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len, size_t slen) { #ifdef DBG_CHK debug("( dest:%#lx src:%#lx len:%llu slen:%llu )", dest, src, len, slen); @@ -1014,7 +885,7 @@ __no_stack_protector void *__memmove_chk(void *dest, const void *src, size_t len return memmove_unsafe(dest, src, len); } -__no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen) +EXTERNC __no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen) { #ifdef DBG_CHK debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen); @@ -1043,7 +914,7 @@ __no_stack_protector char *__strcat_chk(char *dest, const char *src, size_t slen return strcat_unsafe(dest, src); } -__no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen) +EXTERNC __no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen) { #ifdef DBG_CHK debug("( dest:%#lx src:%#lx slen:%llu )", dest, src, slen); @@ -1070,5 +941,6 @@ __no_stack_protector char *__strcpy_chk(char *dest, const char *src, size_t slen if (unlikely(len >= slen)) __chk_fail(); + return strcpy_unsafe(dest, src); } diff --git a/Library/memcpy.c b/Library/memcpy.c new file mode 100644 index 0000000..c4adb18 --- /dev/null +++ b/Library/memcpy.c @@ -0,0 +1,320 @@ +#include + +#include +#include +#include + +/* Some of the functions are from musl library */ +/* https://www.musl-libc.org/ */ +/* +Copyright © 2005-2020 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +void *memcpy_unsafe(void *dest, const void *src, size_t n) +{ + unsigned char *d = dest; + const unsigned char *s = src; + +#ifdef __GNUC__ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define LS >> +#define RS << +#else +#define LS << +#define RS >> +#endif + + typedef uint32_t __attribute__((__may_alias__)) u32; + uint32_t w, x; + + for (; (uintptr_t)s % 4 && n; n--) + *d++ = *s++; + + if ((uintptr_t)d % 4 == 0) + { + for (; n >= 16; s += 16, d += 16, n -= 16) + { + *(u32 *)(d + 0) = *(u32 *)(s + 0); + *(u32 *)(d + 4) = *(u32 *)(s + 4); + *(u32 *)(d + 8) = *(u32 *)(s + 8); + *(u32 *)(d + 12) = *(u32 *)(s + 12); + } + if (n & 8) + { + *(u32 *)(d + 0) = *(u32 *)(s + 0); + *(u32 *)(d + 4) = *(u32 *)(s + 4); + d += 8; + s += 8; + } + if (n & 4) + { + *(u32 *)(d + 0) = *(u32 *)(s + 0); + d += 4; + s += 4; + } + if (n & 2) + { + *d++ = *s++; + *d++ = *s++; + } + if (n & 1) + { + *d = *s; + } + return dest; + } + + if (n >= 32) + switch ((uintptr_t)d % 4) + { + case 1: + w = *(u32 *)s; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + n -= 3; + for (; n >= 17; s += 16, d += 16, n -= 16) + { + x = *(u32 *)(s + 1); + *(u32 *)(d + 0) = (w LS 24) | (x RS 8); + w = *(u32 *)(s + 5); + *(u32 *)(d + 4) = (x LS 24) | (w RS 8); + x = *(u32 *)(s + 9); + *(u32 *)(d + 8) = (w LS 24) | (x RS 8); + w = *(u32 *)(s + 13); + *(u32 *)(d + 12) = (x LS 24) | (w RS 8); + } + break; + case 2: + w = *(u32 *)s; + *d++ = *s++; + *d++ = *s++; + n -= 2; + for (; n >= 18; s += 16, d += 16, n -= 16) + { + x = *(u32 *)(s + 2); + *(u32 *)(d + 0) = (w LS 16) | (x RS 16); + w = *(u32 *)(s + 6); + *(u32 *)(d + 4) = (x LS 16) | (w RS 16); + x = *(u32 *)(s + 10); + *(u32 *)(d + 8) = (w LS 16) | (x RS 16); + w = *(u32 *)(s + 14); + *(u32 *)(d + 12) = (x LS 16) | (w RS 16); + } + break; + case 3: + w = *(u32 *)s; + *d++ = *s++; + n -= 1; + for (; n >= 19; s += 16, d += 16, n -= 16) + { + x = *(u32 *)(s + 3); + *(u32 *)(d + 0) = (w LS 8) | (x RS 24); + w = *(u32 *)(s + 7); + *(u32 *)(d + 4) = (x LS 8) | (w RS 24); + x = *(u32 *)(s + 11); + *(u32 *)(d + 8) = (w LS 8) | (x RS 24); + w = *(u32 *)(s + 15); + *(u32 *)(d + 12) = (x LS 8) | (w RS 24); + } + break; + } + if (n & 16) + { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + if (n & 8) + { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + if (n & 4) + { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + if (n & 2) + { + *d++ = *s++; + *d++ = *s++; + } + if (n & 1) + { + *d = *s; + } + return dest; +#endif + + for (; n; n--) + *d++ = *s++; + return dest; +} + +void *memset_unsafe(void *dest, int c, size_t n) +{ + unsigned char *s = dest; + size_t k; + + if (!n) + return dest; + s[0] = c; + s[n - 1] = c; + if (n <= 2) + return dest; + s[1] = c; + s[2] = c; + s[n - 2] = c; + s[n - 3] = c; + if (n <= 6) + return dest; + s[3] = c; + s[n - 4] = c; + if (n <= 8) + return dest; + + k = -(uintptr_t)s & 3; + s += k; + n -= k; + n &= -4; + +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) u32; + typedef uint64_t __attribute__((__may_alias__)) u64; + + u32 c32 = ((u32)-1) / 255 * (unsigned char)c; + *(u32 *)(s + 0) = c32; + *(u32 *)(s + n - 4) = c32; + if (n <= 8) + return dest; + *(u32 *)(s + 4) = c32; + *(u32 *)(s + 8) = c32; + *(u32 *)(s + n - 12) = c32; + *(u32 *)(s + n - 8) = c32; + if (n <= 24) + return dest; + *(u32 *)(s + 12) = c32; + *(u32 *)(s + 16) = c32; + *(u32 *)(s + 20) = c32; + *(u32 *)(s + 24) = c32; + *(u32 *)(s + n - 28) = c32; + *(u32 *)(s + n - 24) = c32; + *(u32 *)(s + n - 20) = c32; + *(u32 *)(s + n - 16) = c32; + + k = 24 + ((uintptr_t)s & 4); + s += k; + n -= k; + + u64 c64 = c32 | ((u64)c32 << 32); + for (; n >= 32; n -= 32, s += 32) + { + *(u64 *)(s + 0) = c64; + *(u64 *)(s + 8) = c64; + *(u64 *)(s + 16) = c64; + *(u64 *)(s + 24) = c64; + } +#else + for (; n; n--, s++) + *s = c; +#endif + + return dest; +} + +void *memmove_unsafe(void *dest, const void *src, size_t n) +{ +#ifdef __GNUC__ + typedef __attribute__((__may_alias__)) size_t WT; +#define WS (sizeof(WT)) +#endif + + char *d = dest; + const char *s = src; + + if (d == s) + return d; + if ((uintptr_t)s - (uintptr_t)d - n <= -2 * n) + return memcpy(d, s, n); + + if (d < s) + { +#ifdef __GNUC__ + if ((uintptr_t)s % WS == (uintptr_t)d % WS) + { + while ((uintptr_t)d % WS) + { + if (!n--) + return dest; + *d++ = *s++; + } + for (; n >= WS; n -= WS, d += WS, s += WS) + *(WT *)d = *(WT *)s; + } +#endif + for (; n; n--) + *d++ = *s++; + } + else + { +#ifdef __GNUC__ + if ((uintptr_t)s % WS == (uintptr_t)d % WS) + { + while ((uintptr_t)(d + n) % WS) + { + if (!n--) + return dest; + d[n] = s[n]; + } + while (n >= WS) + n -= WS, *(WT *)(d + n) = *(WT *)(s + n); + } +#endif + while (n) + n--, d[n] = s[n]; + } + + return dest; +} diff --git a/Tasking/Task.cpp b/Tasking/Task.cpp index 6548d9b..e880e58 100644 --- a/Tasking/Task.cpp +++ b/Tasking/Task.cpp @@ -788,8 +788,8 @@ namespace Tasking Thread->ExitCode = 0xdead; Thread->Status = TaskStatus::Ready; Thread->Memory = new Memory::MemMgr(Parent->PageTable); - Thread->FPU = (FXState *)Thread->Memory->RequestPages(TO_PAGES(sizeof(FXState))); - memset(Thread->FPU, 0, FROM_PAGES(TO_PAGES(sizeof(FXState)))); + Thread->FPU = (CPU::x64::FXState *)Thread->Memory->RequestPages(TO_PAGES(sizeof(CPU::x64::FXState))); + memset(Thread->FPU, 0, FROM_PAGES(TO_PAGES(sizeof(CPU::x64::FXState)))); // TODO: Is really a good idea to use the FPU in kernel mode? Thread->FPU->mxcsr = 0b0001111110000000; @@ -1169,10 +1169,37 @@ namespace Tasking debug("Created Kernel Process: %s and Thread: %s", kproc->Name, kthrd->Name); TaskingLock.Lock(__FUNCTION__); + bool MONITORSupported = false; + if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_AMD) == 0) + { #if defined(__amd64__) - uint32_t rax, rbx, rcx, rdx; - CPU::x64::cpuid(0x1, &rax, &rbx, &rcx, &rdx); - if (rcx & CPU::x64::CPUID_FEAT_RCX_MONITOR) + CPU::x64::AMD::CPUID0x1 cpuid1amd; +#elif defined(__i386__) + CPU::x32::AMD::CPUID0x1 cpuid1amd; +#endif +#if defined(__amd64__) || defined(__i386__) + asmv("cpuid" + : "=a"(cpuid1amd.EAX.raw), "=b"(cpuid1amd.EBX.raw), "=c"(cpuid1amd.ECX.raw), "=d"(cpuid1amd.EDX.raw) + : "a"(0x1)); +#endif + MONITORSupported = cpuid1amd.ECX.MONITOR; + } + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + { +#if defined(__amd64__) + CPU::x64::Intel::CPUID0x1 cpuid1intel; +#elif defined(__i386__) + CPU::x32::Intel::CPUID0x1 cpuid1intel; +#endif +#if defined(__amd64__) || defined(__i386__) + asmv("cpuid" + : "=a"(cpuid1intel.EAX.raw), "=b"(cpuid1intel.EBX.raw), "=c"(cpuid1intel.ECX.raw), "=d"(cpuid1intel.EDX.raw) + : "a"(0x1)); +#endif + MONITORSupported = cpuid1intel.ECX.MONITOR; + } + + if (MONITORSupported) { trace("CPU has MONITOR/MWAIT support."); } @@ -1182,7 +1209,7 @@ namespace Tasking error("Interrupts are not enabled."); CPU::Interrupts(CPU::Enable); } -#endif + TaskingLock.Unlock(); IdleProcess = CreateProcess(nullptr, (char *)"Idle", TaskTrustLevel::Idle); for (int i = 0; i < SMP::CPUCores; i++) diff --git a/Tests/RandomNumberGenerator.cpp b/Tests/RandomNumberGenerator.cpp index 418ff30..412b23b 100644 --- a/Tests/RandomNumberGenerator.cpp +++ b/Tests/RandomNumberGenerator.cpp @@ -21,7 +21,7 @@ __constructor void TestRandom() #endif RDRANDFlag = cpuid1amd.ECX.RDRAND; } - if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) + else if (strcmp(CPU::Vendor(), x86_CPUID_VENDOR_INTEL) == 0) { #if defined(__amd64__) CPU::x64::Intel::CPUID0x1 cpuid1intel; diff --git a/include/cpu.hpp b/include/cpu.hpp index 21df48b..0d75723 100644 --- a/include/cpu.hpp +++ b/include/cpu.hpp @@ -123,6 +123,29 @@ namespace CPU SIMD_SSSE3, SIMD_SSE41, SIMD_SSE42, + SIMD_AVX, + SIMD_AVX2, + SIMD_AVX512F, + SIMD_AVX512BW, + SIMD_AVX512CD, + SIMD_AVX512DQ, + SIMD_AVX512ER, + SIMD_AVX512IFMA, + SIMD_AVX512PF, + SIMD_AVX512VBMI, + SIMD_AVX512VL, + SIMD_AVX512VNNI, + SIMD_AVX512BITALG, + SIMD_AVX512VPOPCNTDQ, + SIMD_AVX512_4VNNIW, + SIMD_AVX512_4FMAPS, + SIMD_AVX512_VP2INTERSECT, + SIMD_AVX512_BF16, + SIMD_AVX512_VBMI2, + SIMD_AVX512_GFNI, + SIMD_AVX512_VAES, + SIMD_AVX512_VPCLMULQDQ, + SIMD_AVX512_VNNI, }; /** @@ -147,7 +170,7 @@ namespace CPU char *Hypervisor(); /** - * @brief Check SIMD support. + * @brief Check SIMD support. It will return the highest supported SIMD type. * * @return x86SIMDType */ @@ -227,11 +250,25 @@ namespace CPU void *PageTable(void *PT = nullptr); /** @brief To be used only once. */ - void InitializeFeatures(); + void InitializeFeatures(long Core); /** @brief Get CPU counter value. */ uintptr_t Counter(); + typedef int __v4si __attribute__((__vector_size__(16))); + + typedef union + { + __v4si vector; + long long int i64[2]; + int i32[4]; + short i16[8]; + char i8[16]; + int __attribute__((__vector_size__(16))) m128i_i32; + short __attribute__((__vector_size__(16))) m128i_i16; + char __attribute__((__vector_size__(16))) m128i_i8; + } __m128i; + namespace MemBar { SafeFunction static inline void Barrier() @@ -552,6 +589,32 @@ namespace CPU uint32_t raw; } DR7; + struct FXState + { + /** @brief FPU control word */ + uint16_t fcw; + /** @brief FPU status word */ + uint16_t fsw; + /** @brief FPU tag words */ + uint8_t ftw; + /** @brief Reserved (zero) */ + uint8_t Reserved; + /** @brief FPU opcode */ + uint16_t fop; + /** @brief PFU instruction pointer */ + uint64_t rip; + /** @brief FPU data pointer */ + uint64_t rdp; + /** @brief SSE control register */ + uint32_t mxcsr; + /** @brief SSE control register mask */ + uint32_t mxcsrmask; + /** @brief FPU registers (last 6 bytes reserved) */ + uint8_t st[8][16]; + /** @brief XMM registers */ + uint8_t xmm[16][16]; + } __attribute__((packed)); + /** * @brief CPUID * @@ -2125,79 +2188,6 @@ namespace CPU namespace x64 { - enum CPUIDFeatures - { - CPUID_FEAT_RCX_SSE3 = 1 << 0, - CPUID_FEAT_RCX_PCLMULQDQ = 1 << 1, - CPUID_FEAT_RCX_DTES64 = 1 << 2, - CPUID_FEAT_RCX_MONITOR = 1 << 3, - CPUID_FEAT_RCX_DS_CPL = 1 << 4, - CPUID_FEAT_RCX_VMX = 1 << 5, - CPUID_FEAT_RCX_SMX = 1 << 6, - CPUID_FEAT_RCX_EST = 1 << 7, - CPUID_FEAT_RCX_TM2 = 1 << 8, - CPUID_FEAT_RCX_SSSE3 = 1 << 9, - CPUID_FEAT_RCX_CID = 1 << 10, - CPUID_FEAT_RCX_FMA = 1 << 12, - CPUID_FEAT_RCX_CX16 = 1 << 13, - CPUID_FEAT_RCX_ETPRD = 1 << 14, - CPUID_FEAT_RCX_PDCM = 1 << 15, - CPUID_FEAT_RCX_PCIDE = 1 << 17, - CPUID_FEAT_RCX_DCA = 1 << 18, - CPUID_FEAT_RCX_SSE4_1 = 1 << 19, - CPUID_FEAT_RCX_SSE4_2 = 1 << 20, - CPUID_FEAT_RCX_x2APIC = 1 << 21, - CPUID_FEAT_RCX_MOVBE = 1 << 22, - CPUID_FEAT_RCX_POPCNT = 1 << 23, - CPUID_FEAT_RCX_AES = 1 << 25, - CPUID_FEAT_RCX_XSAVE = 1 << 26, - CPUID_FEAT_RCX_OSXSAVE = 1 << 27, - CPUID_FEAT_RCX_AVX = 1 << 28, - CPUID_FEAT_RCX_F16C = 1 << 29, - CPUID_FEAT_RCX_RDRAND = 1 << 30, - - CPUID_FEAT_RDX_FPU = 1 << 0, - CPUID_FEAT_RDX_VME = 1 << 1, - CPUID_FEAT_RDX_DE = 1 << 2, - CPUID_FEAT_RDX_PSE = 1 << 3, - CPUID_FEAT_RDX_TSC = 1 << 4, - CPUID_FEAT_RDX_MSR = 1 << 5, - CPUID_FEAT_RDX_PAE = 1 << 6, - CPUID_FEAT_RDX_MCE = 1 << 7, - CPUID_FEAT_RDX_CX8 = 1 << 8, - CPUID_FEAT_RDX_APIC = 1 << 9, - CPUID_FEAT_RDX_SEP = 1 << 11, - CPUID_FEAT_RDX_MTRR = 1 << 12, - CPUID_FEAT_RDX_PGE = 1 << 13, - CPUID_FEAT_RDX_MCA = 1 << 14, - CPUID_FEAT_RDX_CMOV = 1 << 15, - CPUID_FEAT_RDX_PAT = 1 << 16, - CPUID_FEAT_RDX_PSE36 = 1 << 17, - CPUID_FEAT_RDX_PSN = 1 << 18, - CPUID_FEAT_RDX_CLF = 1 << 19, - CPUID_FEAT_RDX_DTES = 1 << 21, - CPUID_FEAT_RDX_ACPI = 1 << 22, - CPUID_FEAT_RDX_MMX = 1 << 23, - CPUID_FEAT_RDX_FXSR = 1 << 24, - CPUID_FEAT_RDX_SSE = 1 << 25, - CPUID_FEAT_RDX_SSE2 = 1 << 26, - CPUID_FEAT_RDX_SS = 1 << 27, - CPUID_FEAT_RDX_HTT = 1 << 28, - CPUID_FEAT_RDX_TM1 = 1 << 29, - CPUID_FEAT_RDX_IA64 = 1 << 30, - CPUID_FEAT_RDX_PBE = 1 << 31, - - // ? Not sure how to get it. - CPUID_FEAT_RDX_SMEP = 1 << 7, - CPUID_FEAT_RDX_UMIP = 1 << 2, - CPUID_FEAT_RDX_SYSCALL = 1 << 11, - CPUID_FEAT_XD = 1 << 20, - CPUID_FEAT_1GB_PAGE = 1 << 26, - CPUID_FEAT_RDTSCP = 1 << 27, - CPUID_FEAT_LONG_MODE = 1 << 29, - CPUID_FEAT_RDX_SMAP = (1 << 20) - }; - enum MSRID { MSR_MONITOR_FILTER_SIZE = 0x6, @@ -3110,6 +3100,41 @@ namespace CPU uint64_t raw; } CR8; + typedef union XCR0 + { + /* + On https://wiki.osdev.org/CPU_Registers_x86#XCR0 says that the PKRU bit is 9? + */ + struct + { + /** @brief X87 FPU/MMX/SSE Support (must be 1) */ + uint64_t X87 : 1; + /** @brief XSAVE support for MXCSR and XMM registers */ + uint64_t SSE : 1; + /** @brief AVX support for YMM registers */ + uint64_t AVX : 1; + /** @brief MPX support for BND registers */ + uint64_t BNDREG : 1; + /** @brief MPX support for BNDCFGU and BNDSTATUS registers */ + uint64_t BNDCSR : 1; + /** @brief AVX-512 support for opmask registers */ + uint64_t OpMask : 1; + /** @brief AVX-512 enabled and XSAVE support for upper halves of lower ZMM registers */ + uint64_t ZMM_HI256 : 1; + /** @brief AVX-512 enabled and XSAVE support for upper ZMM registers */ + uint64_t HI16_ZMM : 1; + /** @brief XSAVE support for PKRU register */ + uint64_t PKRU : 1; + /** @brief Reserved */ + uint64_t Reserved0 : 53; + /** @brief AMD lightweight profiling */ + uint64_t LWP : 1; + /** @brief Reserved */ + uint64_t Reserved1 : 1; + }; + uint64_t raw; + } XCR0; + typedef union EFER { struct @@ -3233,6 +3258,32 @@ namespace CPU uint64_t raw; } SelectorErrorCode; + struct FXState + { + /** @brief FPU control word */ + uint16_t fcw; + /** @brief FPU status word */ + uint16_t fsw; + /** @brief FPU tag words */ + uint8_t ftw; + /** @brief Reserved (zero) */ + uint8_t Reserved; + /** @brief FPU opcode */ + uint16_t fop; + /** @brief PFU instruction pointer */ + uint64_t rip; + /** @brief FPU data pointer */ + uint64_t rdp; + /** @brief SSE control register */ + uint32_t mxcsr; + /** @brief SSE control register mask */ + uint32_t mxcsrmask; + /** @brief FPU registers (last 6 bytes reserved) */ + uint8_t st[8][16]; + /** @brief XMM registers */ + uint8_t xmm[16][16]; + } __attribute__((packed)); + SafeFunction static inline void lgdt(void *gdt) { #if defined(__amd64__) @@ -3375,6 +3426,18 @@ namespace CPU return (CR8){.raw = Result}; } + SafeFunction static inline XCR0 readxcr0() + { + uint64_t Result = 0; +#if defined(__amd64__) + asmv("xgetbv" + : "=a"(Result) + : "c"(0) + : "edx"); +#endif + return (XCR0){.raw = Result}; + } + SafeFunction static inline void writecr0(CR0 ControlRegister) { #if defined(__amd64__) @@ -3425,6 +3488,16 @@ namespace CPU #endif } + SafeFunction static inline void writexcr0(XCR0 ControlRegister) + { +#if defined(__amd64__) + asmv("xsetbv" + : + : "a"(ControlRegister.raw), "c"(0) + : "edx"); +#endif + } + SafeFunction static inline void fxsave(void *FXSaveArea) { #if defined(__amd64__) @@ -3836,6 +3909,320 @@ namespace CPU } EDX; }; + /** @brief Extended feature flags enumeration */ + struct CPUID0x7_0 + { + union + { + struct + { + uint64_t Reserved : 32; + }; + uint64_t raw; + } EAX; + + union + { + struct + { + /** @brief Access to base of fs and gs */ + uint64_t FSGSBase : 1; + /** @brief IA32_TSC_ADJUST MSR */ + uint64_t IA32TSCAdjust : 1; + /** @brief Software Guard Extensions */ + uint64_t SGX : 1; + /** @brief Bit Manipulation Instruction Set 1 */ + uint64_t BMI1 : 1; + /** @brief TSX Hardware Lock Elision */ + uint64_t HLE : 1; + /** @brief Advanced Vector Extensions 2 */ + uint64_t AVX2 : 1; + /** @brief FDP_EXCPTN_ONLY */ + uint64_t FDPExcptonOnly : 1; + /** @brief Supervisor Mode Execution Protection */ + uint64_t SMEP : 1; + /** @brief Bit Manipulation Instruction Set 2 */ + uint64_t BMI2 : 1; + /** @brief Enhanced REP MOVSB/STOSB */ + uint64_t ERMS : 1; + /** @brief INVPCID */ + uint64_t INVPCID : 1; + /** @brief RTM */ + uint64_t RTM : 1; + /** @brief Intel Resource Director Monitoring */ + uint64_t RDT_M : 1; + /** @brief Deprecates FPU CS and DS values */ + uint64_t DeprecatesFPU : 1; + /** @brief Intel Memory Protection Extensions */ + uint64_t MPX : 1; + /** @brief Intel Resource Director Allocation */ + uint64_t RDT_A : 1; + /** @brief AVX-512 Foundation */ + uint64_t AVX512F : 1; + /** @brief AVX-512 Doubleword and Quadword Instructions */ + uint64_t AVX512DQ : 1; + /** @brief RDSEED */ + uint64_t RDSEED : 1; + /** @brief Intel Multi-Precision Add-Carry Instruction Extensions */ + uint64_t ADX : 1; + /** @brief Supervisor Mode Access Prevention */ + uint64_t SMAP : 1; + /** @brief AVX-512 Integer Fused Multiply-Add Instructions */ + uint64_t AVX512IFMA : 1; + /** @brief Reserved */ + uint64_t Reserved : 1; + /** @brief CLFLUSHOPT */ + uint64_t CLFLUSHOPT : 1; + /** @brief CLWB */ + uint64_t CLWB : 1; + /** @brief Intel Processor Trace */ + uint64_t IntelProcessorTrace : 1; + /** @brief AVX-512 Prefetch Instructions */ + uint64_t AVX512PF : 1; + /** @brief AVX-512 Exponential and Reciprocal Instructions */ + uint64_t AVX512ER : 1; + /** @brief AVX-512 Conflict Detection Instructions */ + uint64_t AVX512CD : 1; + /** @brief SHA Extensions */ + uint64_t SHA : 1; + /** @brief AVX-512 Byte and Word Instructions */ + uint64_t AVX512BW : 1; + /** @brief AVX-512 Vector Length Extensions */ + uint64_t AVX512VL : 1; + }; + uint64_t raw; + } EBX; + + union + { + struct + { + /** @brief PREFETCHWT1 */ + uint64_t PREFETCHWT1 : 1; + /** @brief AVX-512 Vector Bit Manipulation Instructions */ + uint64_t AVX512VBMI : 1; + /** @brief User Mode Instruction Prevention */ + uint64_t UMIP : 1; + /** @brief Memory Protection Keys for User-mode pages */ + uint64_t PKU : 1; + /** @brief PKU enabled by OS */ + uint64_t OSPKE : 1; + /** @brief Timed pause and user-level monitor/wait */ + uint64_t WaitPKG : 1; + /** @brief AVX-512 Vector Bit Manipulation Instructions 2 */ + uint64_t AVX512VBMI2 : 1; + /** @brief Control flow enforcement (CET) shadow stack */ + uint64_t CET_SS : 1; + /** @brief Galois Field instructions */ + uint64_t GFNI : 1; + /** @brief Vector AES instruction set (VEX-256/EVEX) */ + uint64_t VAES : 1; + /** @brief CLMUL instruction set (VEX-256/EVEX) */ + uint64_t VPCLMULQDQ : 1; + /** @brief AVX-512 Vector Neural Network Instructions */ + uint64_t AVX512VNNI : 1; + /** @brief AVX-512 Bit Algorithms Instructions */ + uint64_t AVX512BITALG : 1; + /** @brief IA32_TME related MSRs */ + uint64_t TME : 1; + /** @brief AVX-512 Vector Population Count Double and Quad-word */ + uint64_t AVX512VPOPCNTDQ : 1; + /** @brief Reserved */ + uint64_t Reserved0 : 1; + /** @brief 5-level paging (57 address bits) */ + uint64_t LA57 : 1; + /** @brief The value of userspace MPX Address-Width Adjust used by the BNDLDX and BNDSTX Intel MPX instructions in 64-bit mode */ + uint64_t MAWAU : 5; + /** @brief Read Processor ID and IA32_TSC_AUX */ + uint64_t RDPID : 1; + /** @brief Key Locker */ + uint64_t KL : 1; + /** @brief BUS_LOCK_DETECT */ + uint64_t BusLockDetect : 1; + /** @brief Cache line demote */ + uint64_t CLDEMOTE : 1; + /** @brief Reserved */ + uint64_t Reserved1 : 1; + /** @brief MOVDIRI */ + uint64_t MOVDIRI : 1; + /** @brief MOVDIR64B */ + uint64_t MOVDIR64B : 1; + /** @brief SGX Launch Configuration */ + uint64_t SGX_LC : 1; + /** @brief Protection Keys for Supervisor-mode pages */ + uint64_t PKS : 1; + }; + uint64_t raw; + } ECX; + + union + { + struct + { + /** @brief Reserved */ + uint64_t Reserved0 : 2; + /** @brief AVX-512 4-register Neural Network Instructions */ + uint64_t AVX512_4VNNIW : 1; + /** @brief AVX-512 4-register Multiply Accumulation Single Precision */ + uint64_t AVX512_4FMAPS : 1; + /** @brief Fast Short REP MOVSB/STOSB */ + uint64_t FSRM : 1; + /** @brief User Inter-Processor Interrupts */ + uint64_t UINTR : 1; + /** @brief Reserved */ + uint64_t Reserved1 : 2; + /** @brief AVX-512 VP2INTERSECT Doubleword and Quadword Instructions */ + uint64_t AVX512_VP2INTERSECT : 1; + /** @brief Special Register Buffer Data Sampling Mitigations */ + uint64_t SRBDS_CTRL : 1; + /** @brief VERW instruction clears CPU buffers */ + uint64_t MC_CLEAR : 1; + /** @brief All TSX transactions are aborted */ + uint64_t TSX_FORCE_ABORT : 1; + /** @brief Reserved */ + uint64_t Reserved2 : 1; + /** @brief TSX_FORCE_ABORT MSR is available */ + uint64_t TsxForceAbortMsr : 1; + /** @brief SERIALIZE */ + uint64_t SERIALIZE : 1; + /** @brief Mixture of CPU types in processor topology */ + uint64_t HYBRID : 1; + /** @brief TSXLDTRK */ + uint64_t TSXLDTRK : 1; + /** @brief Reserved */ + uint64_t Reserved3 : 1; + /** @brief Platform configuration for Memory Encryption Technologies Instrctuions */ + uint64_t PCONFIG : 1; + /** @brief Architectural Last Branch Records */ + uint64_t LBR : 1; + /** @brief Control flow enforcement (CET) indirect branch tracking */ + uint64_t CET_IBT : 1; + /** @brief Reserved */ + uint64_t Reserved4 : 1; + /** @brief Tile computation on bfloat16 numbers */ + uint64_t AMX_BF16 : 1; + /** @brief AVX512-FP16 half-precision floating-point instructions */ + uint64_t AVX512_FP16 : 1; + /** @brief Tile architecture */ + uint64_t AMX_TILE : 1; + /** @brief Tile computation on 8-bit integers */ + uint64_t AMX_INT8 : 1; + /** @brief Speculation Control, part of Indirect Branch Control (IBC): + Indirect Branch Restricted Speculation (IBRS) and + Indirect Branch Prediction Barrier (IBPB) */ + uint64_t SPEC_CTRL : 1; + /** @brief Single Thread Indirect Branch Predictor, part of IBC */ + uint64_t STIBP : 1; + /** @brief IA32_FLUSH_CMD MSR */ + uint64_t L1D_FLUSH : 1; + /** @brief IA32_ARCH_CAPABILITIES (lists speculative side channel mitigations */ + uint64_t ArchCapabilities : 1; + /** @brief IA32_CORE_CAPABILITIES MSR (lists model-specific core capabilities) */ + uint64_t CoreCapabilities : 1; + /** @brief Speculative Store Bypass Disable, as mitigation for Speculative Store Bypass (IA32_SPEC_CTRL) */ + uint64_t SSBD : 1; + }; + uint64_t raw; + } EDX; + }; + + /** @brief Extended feature flags enumeration */ + struct CPUID0x7_1 + { + union + { + struct + { + uint64_t Reserved0 : 3; + /** @brief RAO-INT */ + uint64_t RAO_INT : 1; + /** @brief AVX Vector Neural Network Instructions (XNNI) (VEX encoded) */ + uint64_t AVX_VNNI : 1; + /** @brief AVX-512 instructions for bfloat16 numbers */ + uint64_t AVX512_BF16 : 1; + /** @brief Reserved */ + uint64_t Reserved1 : 1; + /** @brief CMPccXADD */ + uint64_t CMPCCXADD : 1; + /** @brief Architectural Performance Monitoring Extended Leaf (EAX=23h) */ + uint64_t ARCHPERFMONEXT : 1; + /** @brief Reserved */ + uint64_t Reserved2 : 1; + /** @brief Fast zero-length MOVSB */ + uint64_t FAST_ZERO_REP_MOVSB : 1; + /** @brief Fast zero-length STOSB */ + uint64_t FAST_SHORT_REP_STOSB : 1; + /** @brief Fast zero-length CMPSB and SCASB */ + uint64_t FAST_SHORT_REP_CMPSB_SCASB : 1; + /** @brief Reserved */ + uint64_t Reserved3 : 4; + /** @brief Flexible Return and Event Delivery */ + uint64_t FRED : 1; + /** @brief LKGS Instruction */ + uint64_t LKGS : 1; + /** @brief WRMSRNS instruction */ + uint64_t WRMSRNS : 1; + /** @brief Reserved */ + uint64_t Reserved4 : 1; + /** @brief AMX instructions for FP16 numbers */ + uint64_t AMX_FP16 : 1; + /** @brief HRESET instruction, IA32_HRESET_ENABLE MSR, and Processor History Reset Leaf (EAX=20h) */ + uint64_t HRESET : 1; + /** @brief AVX IFMA instructions */ + uint64_t AVX_IFMA : 1; + /** @brief Reserved */ + uint64_t Reserved5 : 2; + /** @brief Linear Address Masking */ + uint64_t LAM : 1; + /** @brief RDMSRLIST and WRMSRLIST instructions, and the IA32_BARRIER MSR */ + uint64_t MSRLIST : 1; + }; + uint64_t raw; + } EAX; + + union + { + struct + { + /** @brief IA32_PPIN and IA32_PPIN_CTL MSRs */ + uint64_t PPIN : 1; + /** @brief Reserved */ + uint64_t Reserved : 31; + }; + uint64_t raw; + } EBX; + + union + { + struct + { + /** @brief Reserved */ + uint64_t Reserved : 32; + }; + uint64_t raw; + } ECX; + + union + { + struct + { + /** @brief Reserved */ + uint64_t Reserved0 : 4; + /** @brief AVX VNNI INT8 instructions */ + uint64_t AVX_VNNI_INT8 : 1; + /** @brief AVX NE CONVERT instructions */ + uint64_t AVX_NE_CONVERT : 1; + /** @brief Reserved */ + uint64_t Reserved1 : 8; + /** @brief PREFETCHIT0 and PREFETCHIT1 instructions */ + uint64_t PREFETCHIT : 1; + /** @brief Reserved */ + uint64_t Reserved2 : 17; + }; + uint64_t raw; + } EDX; + }; + /** @brief Performance monitors */ struct CPUID0xA { @@ -4661,8 +5048,9 @@ namespace CPU { struct { - uint64_t BranchID : 16; - uint64_t Reserved0 : 16; + uint64_t BrandId : 16; + uint64_t Reserved0 : 12; + uint64_t PkgType : 4; }; uint64_t raw; } EBX; @@ -4674,9 +5062,28 @@ namespace CPU uint64_t LAHF_SAHF : 1; uint64_t CmpLegacy : 1; uint64_t SVM : 1; - uint64_t Reserved0 : 1; + uint64_t ExtApicSpace : 1; uint64_t AltMovCr8 : 1; - uint64_t Reserved1 : 26; + uint64_t ABM : 1; + uint64_t SSE4A : 1; + uint64_t MisalignedSSE : 1; + uint64_t ThreeDNowPrefetch : 1; + uint64_t OSVW : 1; + uint64_t IBS : 1; + uint64_t XOP : 1; + uint64_t SKINIT : 1; + uint64_t WDT : 1; + uint64_t Reserved0 : 1; + uint64_t LWP : 1; + uint64_t FMA4 : 1; + uint64_t Reserved1 : 1; + uint64_t Reserved2 : 1; + uint64_t NodeID : 1; + uint64_t Reserved3 : 1; + uint64_t TBM : 1; + uint64_t TopologyExtensions : 1; + uint64_t Reserved4 : 9; + }; uint64_t raw; } ECX; diff --git a/include/smp.hpp b/include/smp.hpp index 1dec7a8..a02f082 100644 --- a/include/smp.hpp +++ b/include/smp.hpp @@ -11,7 +11,7 @@ struct CPUArchData { #if defined(__amd64__) - int stub; + CPU::x64::FXState *FPU; /* TODO */ #elif defined(__i386__) #elif defined(__aarch64__) @@ -39,7 +39,7 @@ struct CPUData Tasking::TCB *CurrentThread; /** @brief Architecture-specific data. */ - CPUArchData *Data; + CPUArchData Data; /** @brief Checksum. Used to verify the integrity of the data. Must be equal to CPU_DATA_CHECKSUM (0xC0FFEE). */ int Checksum; } __attribute__((packed)); diff --git a/include/task.hpp b/include/task.hpp index a5141bf..755fddb 100644 --- a/include/task.hpp +++ b/include/task.hpp @@ -57,32 +57,6 @@ namespace Tasking Terminated }; - struct FXState - { - /** @brief FPU control word */ - uint16_t fcw; - /** @brief FPU status word */ - uint16_t fsw; - /** @brief FPU tag words */ - uint8_t ftw; - /** @brief Reserved (zero) */ - uint8_t Reserved; - /** @brief FPU opcode */ - uint16_t fop; - /** @brief PFU instruction pointer */ - uint64_t rip; - /** @brief FPU data pointer */ - uint64_t rdp; - /** @brief SSE control register */ - uint32_t mxcsr; - /** @brief SSE control register mask */ - uint32_t mxcsrmask; - /** @brief FPU registers (last 6 bytes reserved) */ - uint8_t st[8][16]; - /** @brief XMM registers */ - uint8_t xmm[16][16]; - } __attribute__((packed)); - struct TaskSecurity { TaskTrustLevel TrustLevel; @@ -130,7 +104,7 @@ namespace Tasking uintptr_t IPHistory[128]; TaskSecurity Security; TaskInfo Info; - FXState *FPU; + CPU::x64::FXState *FPU; void Rename(const char *name) {