kernel/syscalls: Add experimental native system calls

Signed-off-by: EnderIce2 <enderice2@protonmail.com>
This commit is contained in:
EnderIce2 2024-12-20 04:11:01 +02:00
parent cbd671292d
commit 79d267631a
Signed by: enderice2
GPG Key ID: EACC3AD603BAB4DD
5 changed files with 1544 additions and 126 deletions

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@ int SpawnInit()
const char *argv[4] = {
Config.InitPath,
// "--help",
"--kernel",
nullptr};
Tasking::TaskCompatibility compat = Tasking::Native;

212
Kernel/syscalls/mem.cpp Normal file
View File

@ -0,0 +1,212 @@
/*
This file is part of Fennix Kernel.
Fennix Kernel is free software: you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
Fennix Kernel is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Fennix Kernel. If not, see <https://www.gnu.org/licenses/>.
*/
#include <interface/syscalls.h>
#include <syscalls.hpp>
#include <memory.hpp>
#include <lock.hpp>
#include <exec.hpp>
#include <errno.h>
#include <debug.h>
#include "../kernel.h"
using Tasking::PCB;
using Tasking::TCB;
int sys_brk(SysFrm *, void *end_data)
{
return -ENOSYS;
}
void *sys_mmap(SysFrm *, void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
if (length == 0)
return (void *)-EINVAL;
bool p_None = prot & __SYS_PROT_NONE;
bool p_Read = prot & __SYS_PROT_READ;
bool p_Write = prot & __SYS_PROT_WRITE;
bool p_Exec = prot & __SYS_PROT_EXEC;
bool m_Shared = flags & __SYS_MAP_SHARED;
bool m_Private = flags & __SYS_MAP_PRIVATE;
bool m_Fixed = flags & __SYS_MAP_FIXED;
bool m_Anon = flags & __SYS_MAP_ANONYMOUS;
UNUSED(p_None);
UNUSED(m_Anon);
debug("None:%d Read:%d Write:%d Exec:%d",
p_None, p_Read, p_Write, p_Exec);
debug("Shared:%d Private:%d Fixed:%d Anon:%d",
m_Shared, m_Private, m_Fixed, m_Anon);
int unknownFlags = flags & ~(__SYS_MAP_SHARED | __SYS_MAP_PRIVATE |
__SYS_MAP_FIXED | __SYS_MAP_ANONYMOUS);
if (unknownFlags)
{
/* We still have some flags missing afaik... */
fixme("Unknown flags: %x", unknownFlags);
/* FIXME: Continue? */
}
if (offset % PAGE_SIZE)
return (void *)-EINVAL;
if (uintptr_t(addr) % PAGE_SIZE && m_Fixed)
return (void *)-EINVAL;
if ((m_Shared && m_Private) ||
(!m_Shared && !m_Private))
return (void *)-EINVAL;
PCB *pcb = thisProcess;
Memory::VirtualMemoryArea *vma = pcb->vma;
if (fd != -1 && !m_Anon)
{
fixme("File mapping not fully implemented");
vfs::FileDescriptorTable *fdt = pcb->FileDescriptors;
auto _fd = fdt->FileMap.find(fd);
if (_fd == fdt->FileMap.end())
{
debug("Invalid file descriptor %d", fd);
return (void *)-EBADF;
}
if (p_Read)
{
void *pBuf = vma->RequestPages(TO_PAGES(length));
debug("created buffer at %#lx-%#lx",
pBuf, (uintptr_t)pBuf + length);
uintptr_t mFlags = Memory::US;
if (p_Write)
mFlags |= Memory::RW;
if (m_Fixed)
{
if (m_Shared)
return (void *)-ENOSYS;
int mRet = vma->Map(addr, pBuf, length, mFlags);
if (mRet < 0)
{
debug("Failed to map file: %s", strerror(mRet));
return (void *)(uintptr_t)mRet;
}
off_t oldOff = fdt->usr_lseek(fd, 0, SEEK_CUR);
fdt->usr_lseek(fd, offset, SEEK_SET);
ssize_t ret = fdt->usr_read(fd, pBuf, length);
fdt->usr_lseek(fd, oldOff, SEEK_SET);
if (ret < 0)
{
debug("Failed to read file");
return (void *)ret;
}
return addr;
}
else
{
int mRet = vma->Map(pBuf, pBuf, length, mFlags);
if (mRet < 0)
{
debug("Failed to map file: %s", strerror(mRet));
return (void *)(uintptr_t)mRet;
}
}
off_t oldOff = fdt->usr_lseek(fd, 0, SEEK_CUR);
fdt->usr_lseek(fd, offset, SEEK_SET);
ssize_t ret = fdt->usr_read(fd, pBuf, length);
fdt->usr_lseek(fd, oldOff, SEEK_SET);
if (ret < 0)
{
debug("Failed to read file");
return (void *)ret;
}
return pBuf;
}
debug("???");
return (void *)-ENOSYS;
}
if (length < PAGE_SIZE * 100)
{
debug("length < 100 pages");
if (addr == nullptr)
{
addr = vma->RequestPages(TO_PAGES(length), true);
debug("Allocated %#lx-%#lx for pt %#lx",
addr, (uintptr_t)addr + length, vma->Table);
return addr;
}
void *pAddr = vma->RequestPages(TO_PAGES(length));
if (pAddr == nullptr)
{
debug("Failed to request pages");
return (void *)-ENOMEM;
}
uintptr_t mapFlags = 0;
if (p_Read)
mapFlags |= Memory::PTFlag::US;
if (p_Write)
mapFlags |= Memory::PTFlag::RW;
// if (p_Exec)
// mapFlags |= Memory::PTFlag::XD;
vma->Map(addr, pAddr, length, mapFlags);
debug("mapped region %#lx-%#lx to %#lx-%#lx",
pAddr, (uintptr_t)pAddr + length, addr, (uintptr_t)addr + length);
return addr;
}
debug("Creating CoWRegion");
void *ret = vma->CreateCoWRegion(addr, length,
p_Read, p_Write, p_Exec,
m_Fixed, m_Shared);
debug("ret: %#lx", ret);
return (void *)ret;
}
int sys_munmap(SysFrm *Frame, void *addr, size_t length)
{
return 0;
}
int sys_mprotect(SysFrm *Frame, void *addr, size_t length, int prot)
{
return 0;
}
int sys_madvise(SysFrm *Frame, void *addr, size_t length, int advice)
{
return 0;
}

View File

@ -30,123 +30,287 @@ struct SyscallData
{
const char *Name;
void *Handler;
int RequiredID;
};
using namespace Memory;
using Tasking::PCB;
using Tasking::TCB;
#if defined(__amd64__)
typedef long arch_t;
#elif defined(__i386__)
typedef int arch_t;
#endif
static int sys_api_version(SysFrm *Frame, int version) { return 0; }
static int sys_dummy(SysFrm *Frame) { return 0; }
void sys_0() { stub; }
void sys_1() { stub; }
static ssize_t sys_read(SysFrm *Frame, int fildes, void *buf, size_t nbyte)
{
PCB *pcb = thisProcess;
Memory::VirtualMemoryArea *vma = pcb->vma;
#define sc_MaxSyscall 2
void *pBuf = vma->UserCheckAndGetAddress(buf, nbyte);
if (pBuf == nullptr)
return -EFAULT;
static SyscallData NativeSyscallsTable[sc_MaxSyscall] = {
[0] = {
"0 syscall",
(void *)sys_0,
UINT16_MAX,
},
[1] = {
"1 syscall",
(void *)sys_1,
UINT16_MAX,
}};
vfs::FileDescriptorTable *fdt = pcb->FileDescriptors;
ssize_t ret = fdt->usr_read(fildes, pBuf, nbyte);
if (ret >= 0)
fdt->usr_lseek(fildes, ret, SEEK_CUR);
return ret;
}
static ssize_t sys_pread(SysFrm *Frame, int fildes, void *buf, size_t nbyte, off_t offset)
{
PCB *pcb = thisProcess;
Memory::VirtualMemoryArea *vma = pcb->vma;
void *pBuf = vma->UserCheckAndGetAddress(buf, nbyte);
if (pBuf == nullptr)
return -EFAULT;
vfs::FileDescriptorTable *fdt = pcb->FileDescriptors;
return fdt->usr_pread(fildes, pBuf, nbyte, offset);
}
static ssize_t sys_write(SysFrm *Frame, int fildes, const void *buf, size_t nbyte)
{
PCB *pcb = thisProcess;
Memory::VirtualMemoryArea *vma = pcb->vma;
const void *pBuf = vma->UserCheckAndGetAddress(buf, nbyte);
if (pBuf == nullptr)
return -EFAULT;
vfs::FileDescriptorTable *fdt = pcb->FileDescriptors;
ssize_t ret = fdt->usr_write(fildes, pBuf, nbyte);
if (ret)
fdt->usr_lseek(fildes, ret, SEEK_CUR);
return ret;
}
static ssize_t sys_pwrite(SysFrm *Frame, int fildes, const void *buf, size_t nbyte, off_t offset)
{
PCB *pcb = thisProcess;
Memory::VirtualMemoryArea *vma = pcb->vma;
const void *pBuf = vma->UserCheckAndGetAddress(buf, nbyte);
if (pBuf == nullptr)
return -EFAULT;
vfs::FileDescriptorTable *fdt = pcb->FileDescriptors;
return fdt->usr_pwrite(fildes, pBuf, nbyte, offset);
}
static int sys_open(SysFrm *Frame, const char *pathname, int flags, mode_t mode)
{
PCB *pcb = thisProcess;
Memory::VirtualMemoryArea *vma = pcb->vma;
const char *pPathname = vma->UserCheckAndGetAddress(pathname, PAGE_SIZE);
if (pPathname == nullptr)
return -EFAULT;
debug("%s, %d, %d", pPathname, flags, mode);
if (flags & 0200000 /* O_DIRECTORY */)
{
FileNode *node = fs->GetByPath(pPathname, pcb->CWD);
if (node == nullptr)
{
debug("Couldn't find %s", pPathname);
return -ENOENT;
}
if (!node->IsDirectory())
{
debug("%s is not a directory", pPathname);
return -ENOTDIR;
}
}
vfs::FileDescriptorTable *fdt = pcb->FileDescriptors;
return fdt->usr_open(pPathname, flags, mode);
}
static int sys_close(SysFrm *Frame, int fd)
{
PCB *pcb = thisProcess;
vfs::FileDescriptorTable *fdt = pcb->FileDescriptors;
return fdt->usr_close(fd);
}
static int sys_ioctl(SysFrm *Frame, int fd, unsigned long request, void *argp) { return -ENOSYS; }
static int sys_stat(SysFrm *Frame, const char *pathname, struct stat *statbuf) { return -ENOSYS; }
static int sys_fstat(SysFrm *Frame, int fd, struct stat *statbuf) { return -ENOSYS; }
static int sys_lstat(SysFrm *Frame, const char *pathname, struct stat *statbuf) { return -ENOSYS; }
static int sys_access(SysFrm *Frame, const char *pathname, int mode)
{
PCB *pcb = thisProcess;
Memory::VirtualMemoryArea *vma = pcb->vma;
auto pPathname = vma->UserCheckAndGetAddress(pathname);
if (pPathname == nullptr)
return -EFAULT;
debug("access(%s, %d)", (char *)pPathname, mode);
if (!fs->PathExists(pPathname, pcb->CWD))
return -ENOENT;
stub;
return 0;
}
static int sys_truncate(SysFrm *Frame, const char *pathname, off_t length) { return -ENOSYS; }
static int sys_ftruncate(SysFrm *Frame, int fd, off_t length) { return -ENOSYS; }
static __noreturn void sys_exit(SysFrm *Frame, int status)
{
TCB *t = thisThread;
{
CriticalSection cs;
trace("Userspace thread %s(%d) exited with code %d (%#x)",
t->Name,
t->ID, status,
status < 0 ? -status : status);
t->SetState(Tasking::Zombie);
t->SetExitCode(status);
}
while (true)
t->GetContext()->Yield();
__builtin_unreachable();
}
static pid_t sys_fork(SysFrm *Frame) { return -ENOSYS; }
static int sys_execve(SysFrm *Frame, const char *pathname, char *const argv[], char *const envp[]) { return -ENOSYS; }
static pid_t sys_getpid(SysFrm *Frame) { return -ENOSYS; }
static pid_t sys_getppid(SysFrm *Frame) { return -ENOSYS; }
static pid_t sys_waitpid(pid_t pid, int *wstatus, int options) { return -ENOSYS; }
static int sys_kill(SysFrm *Frame, pid_t pid, int sig) { return -ENOSYS; }
int sys_brk(SysFrm *Frame, void *end_data);
void *sys_mmap(SysFrm *Frame, void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int sys_munmap(SysFrm *Frame, void *addr, size_t length);
int sys_mprotect(SysFrm *Frame, void *addr, size_t length, int prot);
int sys_madvise(SysFrm *Frame, void *addr, size_t length, int advice);
static int sys_pipe(SysFrm *Frame, int pipefd[2]) { return -ENOSYS; }
static int sys_dup(SysFrm *Frame, int oldfd) { return -ENOSYS; }
static int sys_dup2(SysFrm *Frame, int oldfd, int newfd) { return -ENOSYS; }
static int sys_socket(SysFrm *Frame, int domain, int type, int protocol) { return -ENOSYS; }
static int sys_bind(SysFrm *Frame, int sockfd, const struct sockaddr *addr, __SYS_socklen_t addrlen) { return -ENOSYS; }
static int sys_connect(SysFrm *Frame, int sockfd, const struct sockaddr *addr, __SYS_socklen_t addrlen) { return -ENOSYS; }
static int sys_listen(SysFrm *Frame, int sockfd, int backlog) { return -ENOSYS; }
static int sys_accept(SysFrm *Frame, int sockfd, struct sockaddr *addr, __SYS_socklen_t *addrlen) { return -ENOSYS; }
static ssize_t sys_send(SysFrm *Frame, int sockfd, const void *buf, size_t len, int flags) { return -ENOSYS; }
static ssize_t sys_recv(SysFrm *Frame, int sockfd, void *buf, size_t len, int flags) { return -ENOSYS; }
static int sys_shutdown(SysFrm *Frame, int sockfd, int how) { return -ENOSYS; }
static time_t sys_time(SysFrm *Frame, time_t *t) { return -ENOSYS; }
static int sys_clock_gettime(SysFrm *Frame, __SYS_clockid_t clockid, struct timespec *tp) { return -ENOSYS; }
static int sys_clock_settime(SysFrm *Frame, __SYS_clockid_t clockid, const struct timespec *tp) { return -ENOSYS; }
static int sys_nanosleep(SysFrm *Frame, const struct timespec *req, struct timespec *rem) { return -ENOSYS; }
static char *sys_getcwd(SysFrm *Frame, char *buf, size_t size) { return (char *)-ENOSYS; }
static int sys_chdir(SysFrm *Frame, const char *path) { return -ENOSYS; }
static int sys_mkdir(SysFrm *Frame, const char *path, mode_t mode) { return -ENOSYS; }
static int sys_rmdir(SysFrm *Frame, const char *path) { return -ENOSYS; }
static int sys_unlink(SysFrm *Frame, const char *pathname) { return -ENOSYS; }
static int sys_rename(SysFrm *Frame, const char *oldpath, const char *newpath) { return -ENOSYS; }
static SyscallData scTbl[SYS_MAX] = {};
__constructor void __init_native_syscalls(void)
{
/* Initialization */
scTbl[SYS_API_VERSION] = {"SYS_API_VERSION", (void *)sys_api_version};
scTbl[1] = {"dummy", (void *)sys_dummy};
/* I/O */
scTbl[SYS_READ] = {"SYS_READ", (void *)sys_read};
scTbl[SYS_PREAD] = {"SYS_PREAD", (void *)sys_pread};
scTbl[SYS_WRITE] = {"SYS_WRITE", (void *)sys_write};
scTbl[SYS_PWRITE] = {"SYS_PWRITE", (void *)sys_pwrite};
scTbl[SYS_OPEN] = {"SYS_OPEN", (void *)sys_open};
scTbl[SYS_CLOSE] = {"SYS_CLOSE", (void *)sys_close};
scTbl[SYS_IOCTL] = {"SYS_IOCTL", (void *)sys_ioctl};
/* File Status */
scTbl[SYS_STAT] = {"SYS_STAT", (void *)sys_stat};
scTbl[SYS_FSTAT] = {"SYS_FSTAT", (void *)sys_fstat};
scTbl[SYS_LSTAT] = {"SYS_LSTAT", (void *)sys_lstat};
scTbl[SYS_ACCESS] = {"SYS_ACCESS", (void *)sys_access};
scTbl[SYS_TRUNCATE] = {"SYS_TRUNCATE", (void *)sys_truncate};
scTbl[SYS_FTRUNCATE] = {"SYS_FTRUNCATE", (void *)sys_ftruncate};
/* Process Control */
scTbl[SYS_EXIT] = {"SYS_EXIT", (void *)sys_exit};
scTbl[SYS_FORK] = {"SYS_FORK", (void *)sys_fork};
scTbl[SYS_EXECVE] = {"SYS_EXECVE", (void *)sys_execve};
scTbl[SYS_GETPID] = {"SYS_GETPID", (void *)sys_getpid};
scTbl[SYS_GETPPID] = {"SYS_GETPPID", (void *)sys_getppid};
scTbl[SYS_WAITPID] = {"SYS_WAITPID", (void *)sys_waitpid};
scTbl[SYS_KILL] = {"SYS_KILL", (void *)sys_kill};
/* Memory */
scTbl[SYS_BRK] = {"SYS_BRK", (void *)sys_brk};
scTbl[SYS_MMAP] = {"SYS_MMAP", (void *)sys_mmap};
scTbl[SYS_MUNMAP] = {"SYS_MUNMAP", (void *)sys_munmap};
scTbl[SYS_MPROTECT] = {"SYS_MPROTECT", (void *)sys_mprotect};
scTbl[SYS_MADVISE] = {"SYS_MADVISE", (void *)sys_madvise};
/* Communication */
scTbl[SYS_PIPE] = {"SYS_PIPE", (void *)sys_pipe};
scTbl[SYS_DUP] = {"SYS_DUP", (void *)sys_dup};
scTbl[SYS_DUP2] = {"SYS_DUP2", (void *)sys_dup2};
scTbl[SYS_SOCKET] = {"SYS_SOCKET", (void *)sys_socket};
scTbl[SYS_BIND] = {"SYS_BIND", (void *)sys_bind};
scTbl[SYS_CONNECT] = {"SYS_CONNECT", (void *)sys_connect};
scTbl[SYS_LISTEN] = {"SYS_LISTEN", (void *)sys_listen};
scTbl[SYS_ACCEPT] = {"SYS_ACCEPT", (void *)sys_accept};
scTbl[SYS_SEND] = {"SYS_SEND", (void *)sys_send};
scTbl[SYS_RECV] = {"SYS_RECV", (void *)sys_recv};
scTbl[SYS_SHUTDOWN] = {"SYS_SHUTDOWN", (void *)sys_shutdown};
/* Time */
scTbl[SYS_TIME] = {"SYS_TIME", (void *)sys_time};
scTbl[SYS_CLOCK_GETTIME] = {"SYS_CLOCK_GETTIME", (void *)sys_clock_gettime};
scTbl[SYS_CLOCK_SETTIME] = {"SYS_CLOCK_SETTIME", (void *)sys_clock_settime};
scTbl[SYS_NANOSLEEP] = {"SYS_NANOSLEEP", (void *)sys_nanosleep};
/* Miscellaneous */
scTbl[SYS_GETCWD] = {"SYS_GETCWD", (void *)sys_getcwd};
scTbl[SYS_CHDIR] = {"SYS_CHDIR", (void *)sys_chdir};
scTbl[SYS_MKDIR] = {"SYS_MKDIR", (void *)sys_mkdir};
scTbl[SYS_RMDIR] = {"SYS_RMDIR", (void *)sys_rmdir};
scTbl[SYS_UNLINK] = {"SYS_UNLINK", (void *)sys_unlink};
scTbl[SYS_RENAME] = {"SYS_RENAME", (void *)sys_rename};
}
uintptr_t HandleNativeSyscalls(SysFrm *Frame)
{
#if defined(__amd64__)
if (unlikely(Frame->rax > sc_MaxSyscall))
if (unlikely(Frame->ReturnValue() > SYS_MAX))
{
fixme("Syscall %ld not implemented.", Frame->rax);
fixme("Syscall %ld not implemented.", Frame->ReturnValue());
return -ENOSYS;
}
SyscallData Syscall = NativeSyscallsTable[Frame->rax];
SyscallData sc = scTbl[Frame->ReturnValue()];
uintptr_t (*call)(SysFrm *, uintptr_t, ...) =
r_cst(uintptr_t(*)(SysFrm *, uintptr_t, ...),
Syscall.Handler);
sc.Handler);
if (unlikely(!call))
{
error("Syscall %s(%d) not implemented.",
Syscall.Name, Frame->rax);
sc.Name, Frame->ReturnValue());
return -ENOSYS;
}
int euid = thisProcess->Security.Effective.UserID;
int egid = thisProcess->Security.Effective.GroupID;
int reqID = Syscall.RequiredID;
if (euid > reqID || egid > reqID)
{
warn("Process %s(%d) tried to access a system call \"%s\" with insufficient privileges.",
thisProcess->Name, thisProcess->ID, Syscall.Name);
debug("Required: %d; Effective u:%d, g:%d", reqID, euid, egid);
return -EPERM;
}
debug("> [%d:\"%s\"]( %#lx %#lx %#lx %#lx %#lx %#lx )",
Frame->rax, Syscall.Name,
Frame->rdi, Frame->rsi, Frame->rdx,
Frame->r10, Frame->r8, Frame->r9);
Frame->ReturnValue(), sc.Name,
Frame->Arg0(), Frame->Arg1(), Frame->Arg2(),
Frame->Arg3(), Frame->Arg4(), Frame->Arg5());
long sc_ret = call(Frame,
Frame->rdi, Frame->rsi, Frame->rdx,
Frame->r10, Frame->r8, Frame->r9);
Frame->Arg0(), Frame->Arg1(), Frame->Arg2(),
Frame->Arg3(), Frame->Arg4(), Frame->Arg5());
debug("< [%d:\"%s\"] = %d",
Frame->rax, Syscall.Name, sc_ret);
debug("< [%d:\"%s\"] = %ld",
Frame->ReturnValue(), sc.Name, sc_ret);
return sc_ret;
#elif defined(__i386__)
if (unlikely(Frame->eax > sc_MaxSyscall))
{
fixme("Syscall %ld not implemented.", Frame->eax);
return -ENOSYS;
}
SyscallData Syscall = NativeSyscallsTable[Frame->eax];
uintptr_t (*call)(SysFrm *, uintptr_t, ...) =
r_cst(uintptr_t(*)(SysFrm *, uintptr_t, ...),
Syscall.Handler);
if (unlikely(!call))
{
error("Syscall %s(%d) not implemented.",
Syscall.Name, Frame->eax);
return -ENOSYS;
}
int euid = thisProcess->Security.Effective.UserID;
int egid = thisProcess->Security.Effective.GroupID;
int reqID = Syscall.RequiredID;
if (euid > reqID || egid > reqID)
{
warn("Process %s(%d) tried to access a system call \"%s\" with insufficient privileges.",
thisProcess->Name, thisProcess->ID, Syscall.Name);
debug("Required: %d; Effective u:%d, g:%d", reqID, euid, egid);
return -EPERM;
}
debug("> [%d:\"%s\"]( %#x %#x %#x %#x %#x %#x )",
Frame->eax, Syscall.Name,
Frame->ebx, Frame->ecx, Frame->edx,
Frame->esi, Frame->edi, Frame->ebp);
int sc_ret = call(Frame,
Frame->ebx, Frame->ecx, Frame->edx,
Frame->esi, Frame->edi, Frame->ebp);
debug("< [%d:\"%s\"] = %d",
Frame->eax, Syscall.Name, sc_ret);
return sc_ret;
#elif defined(__aarch64__)
return -ENOSYS;
#endif
}