From 76bec0b506388f0bb9d80ab68b833d0a214313c3 Mon Sep 17 00:00:00 2001 From: Matthias Kruk Date: Mon, 18 Nov 2019 17:25:01 +0900 Subject: [PATCH] Make several amendments to the paging code to facilitate process memory management: - Move definition of struct region to kernel-wide arch.h header - Make page directories refer to their regions through a pointer array rather than a linked list - Add pg_dir_foreach_region() function for iterating over the mappings in a page directory - Add pg_dir_map_region() function for mapping a region from one page directory into another - Add a reference counter to each region - Add flags to regions so they can be marked as private, shared, or kernel-related --- kernel/arch/paging.c | 318 ++++++++++++++++++++++++++++++++++-------- kernel/arch/paging.h | 22 +-- kernel/include/arch.h | 29 +++- 3 files changed, 290 insertions(+), 79 deletions(-) diff --git a/kernel/arch/paging.c b/kernel/arch/paging.c index 176261a..1b71822 100644 --- a/kernel/arch/paging.c +++ b/kernel/arch/paging.c @@ -57,7 +57,7 @@ static const char *_str_pg_mode[] = { "Intel64 mode" }; -static int _pg_dir_add_region(pg_dir_t*, void*, u32_t, u32_t, u32_t); +static int _pg_dir_add_region(pg_dir_t*, void*, u32_t, u32_t, u32_t, u32_t); int _pg_dir_vpxlate(pg_dir_t*, u32_t, u32_t*); int _pg_dir_pagesize(pg_dir_t*, u32_t, u32_t*); int _pg_dir_xfer(pg_dir_t*, void*, pg_dir_t*, void*, u32_t); @@ -142,8 +142,11 @@ void pg_frame_free(void *addr) static void _pg_dir_debug_regions(pg_dir_t *pd) { struct region *reg; + int i; + + for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) { + reg = pd->pd_regions[i]; - for(reg = pd->pd_regions; reg; reg = reg->reg_next) { dbg_printf("Region type %02x at 0x%08x:%08x\n", reg->reg_type, reg->reg_base, reg->reg_size); } @@ -382,17 +385,22 @@ void* pg_init(struct multiboot_info *info) _kernel_pgdir.pd_base = (void*)cr3; _pg_dir_add_region(&_kernel_pgdir, TEXT_BASE, TEXT_SIZE, - REGION_TEXT, PAGE_ATTR_USER); + REGION_TEXT, PAGE_ATTR_USER, + REGION_KERNEL | REGION_SHARED); _pg_dir_add_region(&_kernel_pgdir, RODATA_BASE, RODATA_SIZE, - REGION_RODATA, PAGE_ATTR_NO_EXEC); + REGION_RODATA, PAGE_ATTR_NO_EXEC, + REGION_KERNEL | REGION_SHARED); _pg_dir_add_region(&_kernel_pgdir, DATA_BASE, DATA_SIZE, - REGION_DATA, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC); + REGION_DATA, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC, + REGION_KERNEL | REGION_SHARED); _pg_dir_add_region(&_kernel_pgdir, BSS_BASE, BSS_SIZE, - REGION_BSS, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC); + REGION_BSS, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC, + REGION_KERNEL | REGION_SHARED); /* heap region also includes allocations from _phys_alloc() */ _pg_dir_add_region(&_kernel_pgdir, &_mem_start, _mem_start - (u32_t)&_mem_start + CONFIG_KERNEL_HEAP_SIZE, - REGION_HEAP, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC); + REGION_HEAP, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC, + REGION_KERNEL | REGION_SHARED); /* mark all page frames from 0x0 to the end of the kernel heap as used */ for(i = 0; i < _mem_start + CONFIG_KERNEL_HEAP_SIZE; i += PAGE_SIZE) { @@ -414,12 +422,20 @@ void* pg_dir_get_kstack(struct pagedir *pgdir) { struct region *reg; void *ret_val; + int i; + + for(ret_val = NULL, i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) { + reg = pgdir->pd_regions[i]; - for(ret_val = NULL, reg = pgdir->pd_regions; reg; reg = reg->reg_next) { if(reg->reg_type == REGION_KSTACK) { u32_t virt; u32_t phys; + /* + * This function is supposed to return the linear address + * of the kernel stack, so we have to translate it before + * returning the address to the caller. + */ virt = (u32_t)reg->reg_base; _pg_dir_vpxlate(pgdir, virt, &phys); @@ -435,8 +451,11 @@ void* pg_dir_get_ustack(struct pagedir *pgdir) { struct region *reg; void *ret_val; + int i; + + for(ret_val = NULL, i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) { + reg = pgdir->pd_regions[i]; - for(ret_val = NULL, reg = pgdir->pd_regions; reg; reg = reg->reg_next) { if(reg->reg_type == REGION_STACK) { ret_val = reg->reg_base; break; @@ -682,7 +701,8 @@ int _pg_dir_kstack_map(struct pagedir *pgdir) goto cleanup; } - ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_KSTACK, attrs); + ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_KSTACK, attrs, + REGION_PRIV); if(ret_val < 0) { dbg_printf("_pg_dir_add_region() failed\n"); @@ -725,7 +745,8 @@ int _pg_dir_ustack_map(struct pagedir *pgdir) goto cleanup; } - ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_STACK, attrs); + ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_STACK, attrs, + REGION_PRIV); if(ret_val < 0) { goto cleanup; @@ -747,6 +768,108 @@ cleanup: return(ret_val); } +int _clone_kernel_region(pg_dir_t *kdir, region_t *reg, void *data) +{ + pg_dir_t *dir; + u32_t attrs; + int ret_val; + + ret_val = 0; + dir = (pg_dir_t*)data; + + switch(reg->reg_type) { + case REGION_TEXT: + case REGION_RODATA: + case REGION_HEAP: + /* + * The kernel's .text and .rodata region are mapped directly into the page + * directory since they cannot be modified or removed anyways. + */ + case REGION_BSS: + case REGION_DATA: + /* + * The way the interrupt handling is currently implemented, accesses to the + * _cpu structure and _kernel_cr3 are made within the context (i.e. using + * the page directory of) the user-mode process. For that reason the .bss + * and .data sections also have to be mapped into the address space of the + * user space process. See the FIXME below. + */ + + /* + * FIXME: Only map the kernel .text section into the process's page dir + * + * When an interrupt occurs, the processor will push the current context + * onto the process's kernel-mode stack. The interrupt handling code is + * located in the kernel's .text region, which is why we can't get around + * mapping at least that into the process's page directory. However, the + * interrupt handler will switch to the kernel's page directory immediately + * after the context has been pushed onto the stack, so it's really not + * necessary for the process to have even read-access to the kernel's + * .rodata, heap, or anything else. + */ + + /* make sure PAGE_ATTR_USER is set and PAGE_ATTR_WRITABLE is not set */ + attrs = (reg->reg_attrs | PAGE_ATTR_USER) & ~PAGE_ATTR_WRITABLE; + +#if FEATURE(DEBUG) + dbg_printf("Mapping region %02x at 0x%08x:%08x (ATTR=%x)\n", + reg->reg_type, reg->reg_base, reg->reg_size, attrs); +#endif /* FEATURE(DEBUG) */ + + ret_val = pg_dir_map(dir, reg->reg_base, reg->reg_base, + reg->reg_size, attrs); + + if(ret_val >= 0) { + ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size, + reg->reg_type, attrs, + REGION_KERNEL | REGION_SHARED); + } + + break; +#if 0 + case REGION_BSS: + /* + * The .bss section contains the _kernel_cr3 symbol, which is necessary + * for the interrupt handling code to be able to switch to the kernel + * page directory. Alternatively, interrupt handlers could also turn + * off paging to access _kernel_cr3, though... + */ + case REGION_DATA: + /* + * FIXME: Duplicate the parent's .bss and .data, not the kernel's + * + * The kernel's .bss and .data sections only have to be present for + * processes that have been created by means of fork() + execfve() + * (or in other words drivers and system processes), but not for + * any other processes. + */ + + /* these regions are private to the process, so they may be writable */ + attrs = reg->reg_attrs | PAGE_ATTR_USER | PAGE_ATTR_WRITABLE; + + /* allocate new pages to the directory */ + ret_val = pg_dir_map(dir, 0, reg->reg_base, reg->reg_size, attrs); + + if(ret_val >= 0) { + ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size, + reg->reg_type, attrs, reg->reg_flags); + + if(ret_val >= 0) { + /* copy the contents of the pages */ + pg_dir_memcpy(dir, reg->reg_base, &_kernel_pgdir, reg->reg_base, + reg->reg_size); + } + } + + break; +#endif /* 0 */ + default: + break; + } + + return(ret_val); +} + int pg_dir_create(pg_dir_t **dst) { int ret_val; @@ -756,8 +879,6 @@ int pg_dir_create(pg_dir_t **dst) ret_val = -ENOMEM; if(dir) { - struct region *reg; - switch(_pg_flags & PG_MODE_MASK) { case PG_MODE_LEGACY: case PG_MODE_PAE: @@ -785,38 +906,13 @@ int pg_dir_create(pg_dir_t **dst) * We should probably cleanly separate the two, to avoid design flaws * with speculative execution behavior of certain processors */ - for(reg = _kernel_pgdir.pd_regions; reg; reg = reg->reg_next) { - u32_t attrs; - - switch(reg->reg_type) { - case REGION_TEXT: - case REGION_BSS: - case REGION_DATA: - case REGION_RODATA: - case REGION_HEAP: - /* make sure PAGE_ATTR_USER is set and PAGE_ATTR_WRITABLE is not set */ - attrs = (reg->reg_attrs | PAGE_ATTR_USER) & ~PAGE_ATTR_WRITABLE; - - dbg_printf("Mapping region %02x at 0x%08x:%08x (ATTR=%x)\n", - reg->reg_type, reg->reg_base, reg->reg_size, attrs); - - ret_val = pg_dir_map(dir, reg->reg_base, reg->reg_base, - reg->reg_size, attrs); - - if(ret_val >= 0) { - ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size, - reg->reg_type, attrs); - } - - break; - - default: - break; - } - } + pg_dir_foreach_region(&_kernel_pgdir, _clone_kernel_region, dir); +#if 0 /* map the vesa memory into the pagedir */ - pg_dir_map(dir, (void*)0xb8000, (void*)0xb8000, 0x2000, PAGE_ATTR_PRESENT | PAGE_ATTR_WRITABLE); + pg_dir_map(dir, (void*)0xb8000, (void*)0xb8000, 0x2000, + PAGE_ATTR_PRESENT | PAGE_ATTR_WRITABLE); +#endif /* 0 */ /* allocate the kernel stack */ ret_val = _pg_dir_kstack_map(dir); @@ -862,9 +958,11 @@ static int _pg_dir_map_legacy(page_table_t *pd, u32_t paddr, u32_t vaddr, /* * Allocate a large page if we're at a large page boundary * and we're supposed to allocate at least the size of a large page + * and if we're mapping already allocated pages (paddr != NULL) */ - if(size >= PAGE_SIZE_LARGE && ALIGNED(vaddr, PAGE_SIZE_LARGE)) { - pd->pt_entries[pde] = vaddr | PAGE_ATTR_SIZE | PAGE_ATTR_PRESENT | flags; + if(size >= PAGE_SIZE_LARGE && ALIGNED(vaddr, PAGE_SIZE_LARGE) && + paddr && ALIGNED(paddr, PAGE_SIZE_LARGE)) { + pd->pt_entries[pde] = paddr | PAGE_ATTR_SIZE | PAGE_ATTR_PRESENT | flags; paddr += PAGE_SIZE_LARGE; vaddr += PAGE_SIZE_LARGE; @@ -902,8 +1000,10 @@ static int _pg_dir_map_legacy(page_table_t *pd, u32_t paddr, u32_t vaddr, /* allocate new frames if caller didn't specify a physical address */ if(!paddr) { pt->pt_entries[pte] = (u32_t)pg_frame_alloc_end(); + dbg_printf("Allocating page: %04u - 0x%08x\n", pte, pt->pt_entries[pte]); } else { pt->pt_entries[pte] = paddr; + paddr += PAGE_SIZE; } /* return -ENOMEM if we couldn't allocate a frame */ @@ -916,7 +1016,6 @@ static int _pg_dir_map_legacy(page_table_t *pd, u32_t paddr, u32_t vaddr, pt->pt_entries[pte] |= PAGE_ATTR_PRESENT | flags; vaddr += PAGE_SIZE; - paddr += PAGE_SIZE; size -= PAGE_SIZE; } @@ -1015,33 +1114,136 @@ int pg_dir_map(pg_dir_t *pd, const void *phys, const void *virt, return(ret_val); } +int pg_dir_map_region(pg_dir_t *dpd, pg_dir_t *spd, region_t *reg) +{ + int ret_val; + u32_t vaddr; + int idx; + + ret_val = -EFAULT; + + /* + * First of all, check if the destionation page dir has an empty slot + * that we can use to reference the region. + */ + for(idx = 0; idx < CONFIG_PAGING_DIR_MAXREGIONS; idx++) { + if(!dpd->pd_regions[idx]) { + break; + } + } + + if(idx >= CONFIG_PAGING_DIR_MAXREGIONS) { + ret_val = -ERANGE; + goto gtfo; + } + + for(vaddr = (u32_t)reg->reg_base; + vaddr < ((u32_t)reg->reg_base + reg->reg_size); + vaddr += reg->reg_pgsize) { + u32_t paddr; + + /* since the pages may not be continguous in memory, map each page separately */ + ret_val = _pg_dir_vpxlate(spd, vaddr, &paddr); + + if(ret_val < 0) { + break; + } + + ret_val = pg_dir_map(dpd, (void*)paddr, (void*)vaddr, + reg->reg_size, reg->reg_flags); + + if(ret_val < 0) { + break; + } + } + + if(ret_val < 0) { + /* undo mappings that have been added during this call */ + pg_dir_unmap(dpd, reg->reg_base, reg->reg_size); + } else { + /* mark region as shared and increase its refcount */ + reg->reg_flags |= REGION_SHARED; + reg->reg_refs++; + dpd->pd_regions[idx] = reg; + } + +gtfo: + return(ret_val); +} + int pg_dir_unmap(pg_dir_t *pd, const void *base, const u32_t size) { return(-ENOSYS); } +int pg_dir_foreach_region(pg_dir_t *pd, int (*func)(pg_dir_t*, region_t*, void*), void *data) +{ + int ret_val; + + ret_val = -EINVAL; + + if(pd && func) { + int i; + + /* FIXME: Lock the pagedir */ + + for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) { + if(pd->pd_regions[i]) { + ret_val = func(pd, pd->pd_regions[i], data); + + if(ret_val < 0) { + break; + } + } + } + + /* FIXME: Unlock the pagedir */ + } + + return(ret_val); +} + static int _pg_dir_add_region(pg_dir_t *pd, void *base, u32_t size, - u32_t type, u32_t attrs) + u32_t type, u32_t attrs, u32_t flags) { struct region *reg; int ret_val; + int i; - ret_val = -ENOMEM; - reg = kmalloc(sizeof(*reg)); + ret_val = -EFAULT; - if(reg) { - reg->reg_base = base; - reg->reg_size = size; - reg->reg_pgsize = PAGE_SIZE; - reg->reg_type = type; - reg->reg_attrs = attrs; + /* find a usable slot for the region */ + for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) { + if(!pd->pd_regions[i]) { + break; + } + } - reg->reg_next = pd->pd_regions; - pd->pd_regions = reg; + if(i >= CONFIG_PAGING_DIR_MAXREGIONS) { + /* no usable slot found */ + ret_val = -ERANGE; + goto gtfo; + } - ret_val = 0; + reg = kmalloc(sizeof(*reg)); + + if(!reg) { + ret_val = -ENOMEM; + goto gtfo; } + reg->reg_base = base; + reg->reg_size = size; + reg->reg_pgsize = PAGE_SIZE; + reg->reg_type = type; + reg->reg_attrs = attrs; + reg->reg_flags = flags; + reg->reg_refs = 1; + + pd->pd_regions[i] = reg; + ret_val = 0; + +gtfo: return(ret_val); } diff --git a/kernel/arch/paging.h b/kernel/arch/paging.h index cb0fb6b..4096870 100644 --- a/kernel/arch/paging.h +++ b/kernel/arch/paging.h @@ -20,34 +20,16 @@ #define __PAGING_H #include +#include #include "defs.h" -#define REGION_TEXT 0 -#define REGION_HEAP 1 -#define REGION_STACK 2 -#define REGION_BSS 3 -#define REGION_DATA 4 -#define REGION_RODATA 5 -#define REGION_KSTACK 6 - #define FLAG_SHARED (1 << 0) -struct region { - struct region *reg_next; - - void *reg_base; - u32_t reg_type; - u32_t reg_size; - u32_t reg_pgsize; - u32_t reg_attrs; - u32_t reg_flags; -}; - struct pagedir { void *pd_base; u32_t pd_flags; - struct region *pd_regions; + struct region *pd_regions[CONFIG_PAGING_DIR_MAXREGIONS]; }; typedef struct pdpt pdpt_t; diff --git a/kernel/include/arch.h b/kernel/include/arch.h index ed9b2b9..7ac705d 100644 --- a/kernel/include/arch.h +++ b/kernel/include/arch.h @@ -70,6 +70,30 @@ struct task { pid_t t_pid; } __attribute__((packed)); +#define REGION_TEXT 0 +#define REGION_BSS 1 +#define REGION_DATA 2 +#define REGION_RODATA 3 +#define REGION_KSTACK 4 +#define REGION_STACK 5 +#define REGION_HEAP 6 + +#define REGION_SHARED (1 << 0) +#define REGION_KERNEL (1 << 1) +#define REGION_PRIV (1 << 2) + +typedef struct region region_t; + +struct region { + void *reg_base; + u32_t reg_type; + u32_t reg_size; + u32_t reg_pgsize; + u32_t reg_attrs; + u32_t reg_flags; + u32_t reg_refs; +}; + int cpu_get_id(void); u64_t cpu_get_capabilities(void); u64_t cpu_timestamp(void); @@ -114,13 +138,16 @@ void task_move_stack(void*, void*, u32_t); typedef struct pagedir pg_dir_t; int pg_dir_create(pg_dir_t**); + int pg_dir_map(pg_dir_t*, const void*, const void*, const u32_t, const u32_t); +int pg_dir_map_region(pg_dir_t*, pg_dir_t*, region_t*); int pg_dir_unmap(pg_dir_t*, const void*, const u32_t); + void* pg_dir_get_pdbr(pg_dir_t*); int _pg_dir_vpxlate(pg_dir_t*, u32_t, u32_t*); void* pg_dir_get_kstack(pg_dir_t*); void* pg_dir_get_ustack(pg_dir_t*); - +int pg_dir_foreach_region(pg_dir_t*, int(*)(pg_dir_t*, struct region*, void*), void*); int pg_dir_memcpy(pg_dir_t*, void*, pg_dir_t*, void*, u32_t); #endif /* !__ASSEMBLY_SOURCE */ -- 2.47.3