From 33f88512d45e030d0ca70e00d968e1cb28a48954 Mon Sep 17 00:00:00 2001 From: RaphProductions <81994075+RaphProductions@users.noreply.github.com> Date: Sun, 18 May 2025 18:58:22 +0200 Subject: [PATCH] vmm: brokie paging :) --- kernel/linker-x86_64.ld | 8 ++ kernel/src/arch/aarch64/cpu.c | 8 ++ kernel/src/arch/cpu.h | 8 ++ kernel/src/arch/la64/cpu.c | 8 ++ kernel/src/arch/riscv/cpu.c | 8 ++ kernel/src/arch/x86_64/cpu.c | 29 ++++++ kernel/src/boot/limine.c | 13 ++- kernel/src/boot/limine.h | 4 +- kernel/src/main.c | 6 +- kernel/src/mm/pmm.c | 32 +++--- kernel/src/mm/pmm.md | 78 --------------- kernel/src/mm/vmm.c | 177 ++++++++++++++++++++++++++++++++++ kernel/src/mm/vmm.h | 45 +++++++++ 13 files changed, 324 insertions(+), 100 deletions(-) delete mode 100644 kernel/src/mm/pmm.md create mode 100644 kernel/src/mm/vmm.c create mode 100644 kernel/src/mm/vmm.h diff --git a/kernel/linker-x86_64.ld b/kernel/linker-x86_64.ld index b8e1485..7dbf287 100644 --- a/kernel/linker-x86_64.ld +++ b/kernel/linker-x86_64.ld @@ -25,29 +25,36 @@ SECTIONS /* Define a section to contain the Limine requests and assign it to its own PHDR */ .limine_requests : { + reqs_start_ld = .; KEEP(*(.limine_requests_start)) KEEP(*(.limine_requests)) KEEP(*(.limine_requests_end)) + reqs_end_ld = .; } :limine_requests /* Move to the next memory page for .text */ . = ALIGN(CONSTANT(MAXPAGESIZE)); .text : { + text_start_ld = .; *(.text .text.*) + text_end_ld = .; } :text /* Move to the next memory page for .rodata */ . = ALIGN(CONSTANT(MAXPAGESIZE)); .rodata : { + rodata_start_ld = .; *(.rodata .rodata.*) + rodata_end_ld = .; } :rodata /* Move to the next memory page for .data */ . = ALIGN(CONSTANT(MAXPAGESIZE)); .data : { + data_start_ld = .; *(.data .data.*) } :data @@ -58,6 +65,7 @@ SECTIONS .bss : { *(.bss .bss.*) *(COMMON) + data_end_ld = .; } :data /* Discard .note.* and .eh_frame* since they may cause issues on some hosts. */ diff --git a/kernel/src/arch/aarch64/cpu.c b/kernel/src/arch/aarch64/cpu.c index 48fee8b..f2d3feb 100644 --- a/kernel/src/arch/aarch64/cpu.c +++ b/kernel/src/arch/aarch64/cpu.c @@ -12,6 +12,14 @@ void arch_init_stage1() { } +void cpu_load_pm(pagemap_t pm) { + +} + +void cpu_invalidate_page(pagemap_t pm, uint64_t vaddr) { + +} + void hcf() { for (;;) { asm ("wfi"); diff --git a/kernel/src/arch/cpu.h b/kernel/src/arch/cpu.h index 11a28af..8c56fa6 100644 --- a/kernel/src/arch/cpu.h +++ b/kernel/src/arch/cpu.h @@ -7,8 +7,16 @@ #pragma once +#include "mm/vmm.h" + // Stage 1 initialization: Core components (such as the GDT & IDT on x86_64...) void arch_init_stage1(); +// Load a pagemap +void cpu_load_pm(pagemap_t pm); + +// Invalidate a page table entry +void cpu_invalidate_page(pagemap_t pm, uint64_t vaddr); + // Disable interrupts and halt the system. void hcf(); \ No newline at end of file diff --git a/kernel/src/arch/la64/cpu.c b/kernel/src/arch/la64/cpu.c index 6abe756..5c41bca 100644 --- a/kernel/src/arch/la64/cpu.c +++ b/kernel/src/arch/la64/cpu.c @@ -12,6 +12,14 @@ void arch_init_stage1() { } +void cpu_load_pm(pagemap_t pm) { + +} + +void cpu_invalidate_page(pagemap_t pm, uint64_t vaddr) { + +} + void hcf() { for (;;) { asm ("idle 0"); diff --git a/kernel/src/arch/riscv/cpu.c b/kernel/src/arch/riscv/cpu.c index 658b2a4..3dcdf96 100644 --- a/kernel/src/arch/riscv/cpu.c +++ b/kernel/src/arch/riscv/cpu.c @@ -12,6 +12,14 @@ void arch_init_stage1() { } +void cpu_load_pm(pagemap_t pm) { + +} + +void cpu_invalidate_page(pagemap_t pm, uint64_t vaddr) { + +} + void hcf() { for (;;) { asm ("wfi"); diff --git a/kernel/src/arch/x86_64/cpu.c b/kernel/src/arch/x86_64/cpu.c index 99c591a..b54c8e1 100644 --- a/kernel/src/arch/x86_64/cpu.c +++ b/kernel/src/arch/x86_64/cpu.c @@ -5,17 +5,46 @@ * cpu.c - x86_64 CPU control implementation. */ +#include "mm/vmm.h" #if defined (__x86_64__) #include #include #include +#include void arch_init_stage1() { gdt_init(); idt_init(); } +void cpu_load_pm(pagemap_t pm) { + if (!pm) + return; + + __asm__ volatile("mov %0, %%cr3" : : "r"(physical((uint64_t)pm)) : "memory"); +} + +static uint64_t read_cr3(void) +{ + unsigned long val; + asm volatile ( "mov %%cr3, %0" : "=r"(val) ); + return val; +} + +void cpu_invalidate_page(pagemap_t pm, uint64_t vaddr) { + uint64_t cr3 = read_cr3(); + if (physical((uint64_t)pm) != cr3) + { + // load the provided PM in cr3, invalidate the page and return into the previous cr3. + cpu_load_pm(pm); + asm volatile ( "invlpg (%0)" : : "b"(vaddr) : "memory" ); + cpu_load_pm((pagemap_t)cr3); + return; + } + asm volatile ( "invlpg (%0)" : : "b"(vaddr) : "memory" ); +} + void hcf() { asm ("cli"); for (;;) { diff --git a/kernel/src/boot/limine.c b/kernel/src/boot/limine.c index abcdf0c..2294d64 100644 --- a/kernel/src/boot/limine.c +++ b/kernel/src/boot/limine.c @@ -44,6 +44,12 @@ static volatile struct limine_hhdm_request hhdm_req = { .revision = 0 }; +__attribute__((used, section(".limine_requests"))) +static volatile struct limine_executable_address_request kaddr_req = { + .id = LIMINE_EXECUTABLE_ADDRESS_REQUEST, + .revision = 0 +}; + __attribute__((used, section(".limine_requests_start"))) static volatile LIMINE_REQUESTS_START_MARKER; @@ -87,7 +93,6 @@ limine_bootinfo_t *limine_get_bootinfo() { return &__limine_bootinfo; } -uint64_t limine_get_hhdm_offset() -{ - return hhdm_req.response->offset; -} \ No newline at end of file +uint64_t limine_get_hhdm_offset() { return hhdm_req.response->offset; } +uint64_t limine_get_kernel_vaddr() { return kaddr_req.response->virtual_base; } +uint64_t limine_get_kernel_paddr() { return kaddr_req.response->physical_base; } \ No newline at end of file diff --git a/kernel/src/boot/limine.h b/kernel/src/boot/limine.h index 3142f9d..96a1a91 100644 --- a/kernel/src/boot/limine.h +++ b/kernel/src/boot/limine.h @@ -31,4 +31,6 @@ limine_bootinfo_t *limine_get_bootinfo(); // Get the memory map. struct limine_memmap_response *limine_get_memmap(); -uint64_t limine_get_hhdm_offset(); \ No newline at end of file +uint64_t limine_get_hhdm_offset(); +uint64_t limine_get_kernel_vaddr(); +uint64_t limine_get_kernel_paddr(); \ No newline at end of file diff --git a/kernel/src/main.c b/kernel/src/main.c index 05437c7..e6593da 100644 --- a/kernel/src/main.c +++ b/kernel/src/main.c @@ -5,6 +5,7 @@ * main.c - Kernel entry point and initialization. */ +#include "mm/vmm.h" #include #include #include @@ -31,11 +32,12 @@ void kmain(void) { arch_init_stage1(); pmm_init(); - uint8_t* mem = pmm_alloc_page() + 0xFFFF800000000000; + vmm_init(); + /*uint8_t* mem = pmm_alloc_page() + 0xFFFF800000000000; memcpy(mem, "HelloWorld\0", 11); trace("pmm: Read from allocated memory: %s\n", mem); pmm_free_page(mem); - trace("pmm: Freed memory.\n"); + trace("pmm: Freed memory.\n");*/ // We're done, just hang... hcf(); diff --git a/kernel/src/mm/pmm.c b/kernel/src/mm/pmm.c index ae39604..e3d9be9 100644 --- a/kernel/src/mm/pmm.c +++ b/kernel/src/mm/pmm.c @@ -23,27 +23,29 @@ static pmm_page_t *pmm_free_list_head = NULL; void pmm_free_page(void *mem) { pmm_page_t *page = (pmm_page_t*)mem; pmm_page_t *page_hhalf = (pmm_page_t*)higher_half((uint64_t)page); - page_hhalf->next = pmm_free_list_head; - pmm_free_list_head = page; + page_hhalf->next = (pmm_page_t*)higher_half((uint64_t)pmm_free_list_head); + pmm_free_list_head = page_hhalf; pmm_available_pages++; } static void __pmm_steal_pages_from_region_head(int pages) { - pmm_region_list_head->length -= PMM_PAGE_SIZE; - void *page = (void*)pmm_region_list_head->base + - pmm_region_list_head->length; - pmm_free_page(page); + for (int i = 0; i < pages; i++) { + pmm_region_list_head->length -= PMM_PAGE_SIZE; + void *page = (void*)pmm_region_list_head->base + + pmm_region_list_head->length; + pmm_free_page(page); - if (pmm_region_list_head->length == 0) - { - // If a region is totally consumed, - // we can turn it into a free page :) - // So our 4kb aren't really lost - void *mem = (void*)pmm_region_list_head; - pmm_region_list_head = pmm_region_list_head->next; - - pmm_free_page(mem); + if (pmm_region_list_head->length == 0) + { + // If a region is totally consumed, + // we can turn it into a free page :) + // So our 4kb aren't really lost + void *mem = (void*)pmm_region_list_head; + pmm_region_list_head = pmm_region_list_head->next; + + pmm_free_page(mem); + } } } diff --git a/kernel/src/mm/pmm.md b/kernel/src/mm/pmm.md deleted file mode 100644 index a5bad46..0000000 --- a/kernel/src/mm/pmm.md +++ /dev/null @@ -1,78 +0,0 @@ -# Soaplin's Physical Memory Manager - -The Physical Memory Manager (PMM) in Soaplin uses a lazy-loading design that efficiently manages physical memory pages while minimizing boot time overhead. - -## Design Overview - -The PMM uses a two-level allocation strategy: -1. Region List - tracks large blocks of available physical memory -2. Free Page List - manages individual pages ready for immediate allocation - -### Memory Regions - -Each memory region is tracked by a `pmm_region_t` structure that contains: -- Base address of the available memory -- Length of remaining memory -- Pointer to next region - -The region structure is cleverly stored in the first page of the region itself, making the overhead minimal (just one 4KB page per region). -When the region has been totally consumed, it's metadata page is turned -into a free page that can be allocated. - -### Free Page List - -The free page list is a singly-linked list of individual pages that are ready for immediate allocation. It gets refilled from regions only when needed. - -## Lazy Loading - -Instead of initializing all free pages at boot time, the PMM: -1. Only initializes region structures during boot -2. Adds pages to the free list on-demand -3. Consumes memory regions gradually as needed - -This approach provides several benefits: -- Very fast boot times regardless of RAM size -- Memory overhead proportional to number of regions, not total RAM -- No performance penalty during normal operation - -## Memory Organization - -Physical memory is organized as follows: -- Each region's first page contains the region metadata -- Remaining pages in each region are available for allocation -- Pages are standard 4KB size -- Free pages are linked together in the free list - -## Usage - -The PMM provides three main functions: -- `pmm_init()` - Initializes the PMM from the bootloader's memory map -- `pmm_alloc_page()` - Allocates a single 4KB page -- `pmm_free_page()` - Returns a page to the free list - -## Implementation Details - -### Region Initialization -During boot, the PMM: -1. Receives memory map from Limine -2. Identifies usable memory regions -3. Sets up region tracking structures -4. Calculates total available pages - -### Page Allocation -When allocating pages: -1. First tries the free list -2. If free list is empty: - - Takes 4 pages from current region - - Adds it to free list - - Updates region metadata - - If the region has been consumed - - Let the next region take the head - - Free the region's metadata page. -3. Returns the page to the caller - -### Memory Tracking -The PMM maintains counters for: -- Total available pages -- Currently free pages -This allows for memory usage monitoring and OOM detection. diff --git a/kernel/src/mm/vmm.c b/kernel/src/mm/vmm.c new file mode 100644 index 0000000..f4ce7d3 --- /dev/null +++ b/kernel/src/mm/vmm.c @@ -0,0 +1,177 @@ +/* + * The Soaplin Kernel + * Copyright (C) 2025 The SILD Project + * + * vmm.c - Virtual memory manager + */ + +#include "boot/limine.h" +#include "lib/log.h" +#include +#include + +#include +#include +#include +#include +#include + +pagemap_t vmm_kernel_pm = NULL; +pagemap_t vmm_current_pm = NULL; + +void vmm_init() { +#if !defined(__x86_64__) + fatal("vmm: not implemented\n"); + hcf(); +#endif + // Our objective here is to recreate the + // kernel page map that Limine provide us + + vmm_kernel_pm = vmm_alloc_pm(); + + uint64_t kvaddr = limine_get_kernel_vaddr(); + uint64_t kpaddr = limine_get_kernel_paddr(); + uint64_t reqs_start = ALIGN_DOWN((uint64_t)reqs_start_ld, PMM_PAGE_SIZE); + uint64_t reqs_end = ALIGN_UP((uint64_t)reqs_end_ld, PMM_PAGE_SIZE); + uint64_t text_start = ALIGN_DOWN((uint64_t)text_start_ld, PMM_PAGE_SIZE); + uint64_t text_end = ALIGN_UP((uint64_t)text_end_ld, PMM_PAGE_SIZE); + uint64_t rodata_start = ALIGN_DOWN((uint64_t)rodata_start_ld, PMM_PAGE_SIZE); + uint64_t rodata_end = ALIGN_UP((uint64_t)rodata_end_ld, PMM_PAGE_SIZE); + uint64_t data_start = ALIGN_DOWN((uint64_t)data_start_ld, PMM_PAGE_SIZE); + uint64_t data_end = ALIGN_UP((uint64_t)data_end_ld, PMM_PAGE_SIZE); + + // Now, map the kernel's sections + for (uint64_t i = reqs_start; i < reqs_end; i += PMM_PAGE_SIZE) + vmm_map(vmm_kernel_pm, i, i - kvaddr + kpaddr, PTE_PRESENT | PTE_WRITE); // why would i write into Limine requests? + trace("vmm: Mapped limine rqs: PW\n"); + for (uint64_t i = text_start; i < text_end; i += PMM_PAGE_SIZE) + vmm_map(vmm_kernel_pm, i, i - kvaddr + kpaddr, PTE_PRESENT); + trace("vmm: Mapped text: P\n"); + for (uint64_t i = rodata_start; i < rodata_end; i += PMM_PAGE_SIZE) + vmm_map(vmm_kernel_pm, i, i - kvaddr + kpaddr, PTE_PRESENT | PTE_NX); + trace("vmm: Mapped rodata: P NX\n"); + for (uint64_t i = data_start; i < data_end; i += PMM_PAGE_SIZE) + vmm_map(vmm_kernel_pm, i, i - kvaddr + kpaddr, PTE_PRESENT | PTE_WRITE | PTE_NX); + trace("vmm: Mapped data: PW NX\n"); + + // Map the lower 4 GiB into the higher-half + for (uint64_t i = 0; i < 0x100000000; i += PMM_PAGE_SIZE) + vmm_map(vmm_kernel_pm, higher_half(i), i, PTE_PRESENT | PTE_WRITE); + trace("vmm: Mapped lower 4gib to higher half with flags: PW\n"); + + cpu_load_pm(vmm_kernel_pm); + + trace("vmm: Initialized.\n"); +} + +void vmm_load_pm(pagemap_t pm) { + if (!pm) + return; + + vmm_current_pm = pm; + cpu_load_pm((pagemap_t)physical((uint64_t)pm)); +} + +pagemap_t vmm_alloc_pm() { + pagemap_t pm = (pagemap_t)higher_half((uint64_t)pmm_alloc_page()); + memset((void*)pm, 0, PMM_PAGE_SIZE); + + if (vmm_kernel_pm) + { + for (int i = 256; i < 512; i++) + pm[i] = vmm_kernel_pm[i]; + } + + return pm; +} + +void vmm_free_pm(pagemap_t pm) { + if (pm == vmm_kernel_pm) + { + warn("vmm: Who tried to free the kernel's pagemap?!\n"); + return; + } + pmm_free_page((void*)pm); +} + +static uint64_t *__vmm_get_next_lvl(uint64_t *level, uint64_t entry, + uint64_t flags, bool alloc) { + if (level[entry] & PTE_PRESENT) + return (uint64_t *)higher_half(PTE_GET_ADDR(level[entry])); + if (alloc) { + uint64_t *pml = (uint64_t *)higher_half((uint64_t)pmm_alloc_page()); + memset(pml, 0, PMM_PAGE_SIZE); + level[entry] = (uint64_t)physical((uint64_t)pml) | flags; + return pml; + } + return NULL; +} + +void vmm_map(pagemap_t pm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { + if (!pm) return; + + uint64_t pml4_entry = (vaddr >> 39) & 0x1ff; + uint64_t pml3_entry = (vaddr >> 30) & 0x1ff; + uint64_t pml2_entry = (vaddr >> 21) & 0x1ff; + uint64_t pml1_entry = (vaddr >> 12) & 0x1ff; + + uint64_t *pml3 = __vmm_get_next_lvl(pm , pml4_entry, PTE_PRESENT | PTE_WRITE, true); + uint64_t *pml2 = __vmm_get_next_lvl(pml3, pml3_entry, PTE_PRESENT | PTE_WRITE, true); + uint64_t *pml1 = __vmm_get_next_lvl(pml2, pml2_entry, PTE_PRESENT | PTE_WRITE, true); + + pml1[pml1_entry] = paddr | flags; +} + +void vmm_map_user(pagemap_t pm, uint64_t vaddr, uint64_t paddr, + uint64_t flags) { + if (!pm) return; + + uint64_t pml4_entry = (vaddr >> 39) & 0x1ff; + uint64_t pml3_entry = (vaddr >> 30) & 0x1ff; + uint64_t pml2_entry = (vaddr >> 21) & 0x1ff; + uint64_t pml1_entry = (vaddr >> 12) & 0x1ff; + + uint64_t *pml3 = __vmm_get_next_lvl(pm , pml4_entry, flags, true); + uint64_t *pml2 = __vmm_get_next_lvl(pml3, pml3_entry, flags, true); + uint64_t *pml1 = __vmm_get_next_lvl(pml2, pml2_entry, flags, true); + + pml1[pml1_entry] = paddr | flags; +} + +void vmm_unmap(pagemap_t pm, uint64_t vaddr) { + if (!pm) return; + + uint64_t pml4_entry = (vaddr >> 39) & 0x1ff; + uint64_t pml3_entry = (vaddr >> 30) & 0x1ff; + uint64_t pml2_entry = (vaddr >> 21) & 0x1ff; + uint64_t pml1_entry = (vaddr >> 12) & 0x1ff; + + uint64_t *pml3 = __vmm_get_next_lvl(pm , pml4_entry, 0, false); + if (!pml3) return; + uint64_t *pml2 = __vmm_get_next_lvl(pml3, pml3_entry, 0, false); + if (!pml2) return; + uint64_t *pml1 = __vmm_get_next_lvl(pml2, pml2_entry, 0, false); + if (!pml1) return; + + pml1[pml1_entry] = 0; + cpu_invalidate_page(pm, vaddr); +} + +void vmm_protect(pagemap_t pm, uint64_t vaddr, uint64_t flags) { + if (!pm) return; + + uint64_t pml4_entry = (vaddr >> 39) & 0x1ff; + uint64_t pml3_entry = (vaddr >> 30) & 0x1ff; + uint64_t pml2_entry = (vaddr >> 21) & 0x1ff; + uint64_t pml1_entry = (vaddr >> 12) & 0x1ff; + + uint64_t *pml3 = __vmm_get_next_lvl(pm , pml4_entry, 0, false); + if (!pml3) return; + uint64_t *pml2 = __vmm_get_next_lvl(pml3, pml3_entry, 0, false); + if (!pml2) return; + uint64_t *pml1 = __vmm_get_next_lvl(pml2, pml2_entry, 0, false); + if (!pml1) return; + + uint64_t paddr = pml1[pml1_entry] & PTE_ADDR_MASK; + pml1[pml1_entry] = paddr | flags; +} \ No newline at end of file diff --git a/kernel/src/mm/vmm.h b/kernel/src/mm/vmm.h new file mode 100644 index 0000000..7b769c6 --- /dev/null +++ b/kernel/src/mm/vmm.h @@ -0,0 +1,45 @@ +/* + * The Soaplin Kernel + * Copyright (C) 2025 The SILD Project + * + * vmm.c - Virtual memory manager + */ + +#pragma once + +// Page flags +#include + +#define PTE_ADDR_MASK 0x000ffffffffff000 +#define PTE_GET_ADDR(VALUE) ((VALUE) & PTE_ADDR_MASK) +#define PTE_GET_FLAGS(VALUE) ((VALUE) & ~PTE_ADDR_MASK) + +#define PTE_PRESENT (1 << 0) +#define PTE_WRITE (1 << 1) +#define PTE_USER (1 << 2) +#define PTE_NX (1ULL << 63) // NX = No eXecute. + +typedef uint64_t *pagemap_t; + +// These are defined in the linker file. +extern char reqs_start_ld; +extern char reqs_end_ld; + +extern char text_start_ld; +extern char text_end_ld; + +extern char rodata_start_ld; +extern char rodata_end_ld; + +extern char data_start_ld; +extern char data_end_ld; + +void vmm_init(); +pagemap_t vmm_alloc_pm(); +void vmm_free_pm(pagemap_t pm); + +void vmm_map(pagemap_t pm, uint64_t vaddr, uint64_t paddr, uint64_t flags); +void vmm_map_user(pagemap_t pm, uint64_t vaddr, uint64_t paddr, + uint64_t flags); +void vmm_unmap(pagemap_t pm, uint64_t vaddr); +void vmm_protect(pagemap_t pm, uint64_t vaddr, uint64_t flags); \ No newline at end of file