]> git.corax.cc Git - corax/commitdiff
Make several amendments to the paging code to facilitate process memory management:
authorMatthias Kruk <m@m10k.eu>
Mon, 18 Nov 2019 08:25:01 +0000 (17:25 +0900)
committerMatthias Kruk <m@m10k.eu>
Mon, 18 Nov 2019 08:25:01 +0000 (17:25 +0900)
 - Move definition of struct region to kernel-wide arch.h header
 - Make page directories refer to their regions through a pointer array rather than a linked list
 - Add pg_dir_foreach_region() function for iterating over the mappings in a page directory
 - Add pg_dir_map_region() function for mapping a region from one page directory into another
 - Add a reference counter to each region
 - Add flags to regions so they can be marked as private, shared, or kernel-related

kernel/arch/paging.c
kernel/arch/paging.h
kernel/include/arch.h

index 176261a2bd792f277bef648338552e539d424ff9..1b71822b5a155b8654f5697ca5b2d05cc96ce171 100644 (file)
@@ -57,7 +57,7 @@ static const char *_str_pg_mode[] = {
     "Intel64 mode"
 };
 
-static int _pg_dir_add_region(pg_dir_t*, void*, u32_t, u32_t, u32_t);
+static int _pg_dir_add_region(pg_dir_t*, void*, u32_t, u32_t, u32_t, u32_t);
 int _pg_dir_vpxlate(pg_dir_t*, u32_t, u32_t*);
 int _pg_dir_pagesize(pg_dir_t*, u32_t, u32_t*);
 int _pg_dir_xfer(pg_dir_t*, void*, pg_dir_t*, void*, u32_t);
@@ -142,8 +142,11 @@ void pg_frame_free(void *addr)
 static void _pg_dir_debug_regions(pg_dir_t *pd)
 {
        struct region *reg;
+       int i;
+
+       for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+               reg = pd->pd_regions[i];
 
-       for(reg = pd->pd_regions; reg; reg = reg->reg_next) {
                dbg_printf("Region type %02x at 0x%08x:%08x\n",
                                   reg->reg_type, reg->reg_base, reg->reg_size);
        }
@@ -382,17 +385,22 @@ void* pg_init(struct multiboot_info *info)
        _kernel_pgdir.pd_base = (void*)cr3;
 
        _pg_dir_add_region(&_kernel_pgdir, TEXT_BASE, TEXT_SIZE,
-                                          REGION_TEXT, PAGE_ATTR_USER);
+                                          REGION_TEXT, PAGE_ATTR_USER,
+                                          REGION_KERNEL | REGION_SHARED);
        _pg_dir_add_region(&_kernel_pgdir, RODATA_BASE, RODATA_SIZE,
-                                          REGION_RODATA, PAGE_ATTR_NO_EXEC);
+                                          REGION_RODATA, PAGE_ATTR_NO_EXEC,
+                                          REGION_KERNEL | REGION_SHARED);
        _pg_dir_add_region(&_kernel_pgdir, DATA_BASE, DATA_SIZE,
-                                          REGION_DATA, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC);
+                                          REGION_DATA, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC,
+                                          REGION_KERNEL | REGION_SHARED);
        _pg_dir_add_region(&_kernel_pgdir, BSS_BASE, BSS_SIZE,
-                                          REGION_BSS, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC);
+                                          REGION_BSS, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC,
+                                          REGION_KERNEL | REGION_SHARED);
        /* heap region also includes allocations from _phys_alloc() */
        _pg_dir_add_region(&_kernel_pgdir, &_mem_start,
                                           _mem_start - (u32_t)&_mem_start + CONFIG_KERNEL_HEAP_SIZE,
-                                          REGION_HEAP, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC);
+                                          REGION_HEAP, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC,
+                                          REGION_KERNEL | REGION_SHARED);
 
     /* mark all page frames from 0x0 to the end of the kernel heap as used */
     for(i = 0; i < _mem_start + CONFIG_KERNEL_HEAP_SIZE; i += PAGE_SIZE) {
@@ -414,12 +422,20 @@ void* pg_dir_get_kstack(struct pagedir *pgdir)
 {
        struct region *reg;
        void *ret_val;
+       int i;
+
+       for(ret_val = NULL, i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+               reg = pgdir->pd_regions[i];
 
-       for(ret_val = NULL, reg = pgdir->pd_regions; reg; reg = reg->reg_next) {
                if(reg->reg_type == REGION_KSTACK) {
                        u32_t virt;
                        u32_t phys;
 
+                       /*
+                        * This function is supposed to return the linear address
+                        * of the kernel stack, so we have to translate it before
+                        * returning the address to the caller.
+                        */
                        virt = (u32_t)reg->reg_base;
                        _pg_dir_vpxlate(pgdir, virt, &phys);
 
@@ -435,8 +451,11 @@ void* pg_dir_get_ustack(struct pagedir *pgdir)
 {
        struct region *reg;
        void *ret_val;
+       int i;
+
+       for(ret_val = NULL, i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+               reg = pgdir->pd_regions[i];
 
-       for(ret_val = NULL, reg = pgdir->pd_regions; reg; reg = reg->reg_next) {
                if(reg->reg_type == REGION_STACK) {
                        ret_val = reg->reg_base;
                        break;
@@ -682,7 +701,8 @@ int _pg_dir_kstack_map(struct pagedir *pgdir)
                        goto cleanup;
                }
 
-               ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_KSTACK, attrs);
+               ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_KSTACK, attrs,
+                                                                        REGION_PRIV);
 
                if(ret_val < 0) {
                        dbg_printf("_pg_dir_add_region() failed\n");
@@ -725,7 +745,8 @@ int _pg_dir_ustack_map(struct pagedir *pgdir)
                        goto cleanup;
                }
 
-               ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_STACK, attrs);
+               ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_STACK, attrs,
+                                                                        REGION_PRIV);
 
                if(ret_val < 0) {
                        goto cleanup;
@@ -747,6 +768,108 @@ cleanup:
        return(ret_val);
 }
 
+int _clone_kernel_region(pg_dir_t *kdir, region_t *reg, void *data)
+{
+       pg_dir_t *dir;
+       u32_t attrs;
+       int ret_val;
+
+       ret_val = 0;
+       dir = (pg_dir_t*)data;
+
+       switch(reg->reg_type) {
+       case REGION_TEXT:
+       case REGION_RODATA:
+       case REGION_HEAP:
+               /*
+                * The kernel's .text and .rodata region are mapped directly into the page
+                * directory since they cannot be modified or removed anyways.
+                */
+       case REGION_BSS:
+       case REGION_DATA:
+               /*
+                * The way the interrupt handling is currently implemented, accesses to the
+                * _cpu structure and _kernel_cr3 are made within the context (i.e. using
+                * the page directory of) the user-mode process. For that reason the .bss
+                * and .data sections also have to be mapped into the address space of the
+                * user space process. See the FIXME below.
+                */
+
+               /*
+                * FIXME: Only map the kernel .text section into the process's page dir
+                *
+                * When an interrupt occurs, the processor will push the current context
+                * onto the process's kernel-mode stack. The interrupt handling code is
+                * located in the kernel's .text region, which is why we can't get around
+                * mapping at least that into the process's page directory. However, the
+                * interrupt handler will switch to the kernel's page directory immediately
+                * after the context has been pushed onto the stack, so it's really not
+                * necessary for the process to have even read-access to the kernel's
+                * .rodata, heap, or anything else.
+                */
+
+               /* make sure PAGE_ATTR_USER is set and PAGE_ATTR_WRITABLE is not set */
+               attrs = (reg->reg_attrs | PAGE_ATTR_USER) & ~PAGE_ATTR_WRITABLE;
+
+#if FEATURE(DEBUG)
+               dbg_printf("Mapping region %02x at 0x%08x:%08x (ATTR=%x)\n",
+                                  reg->reg_type, reg->reg_base, reg->reg_size, attrs);
+#endif /* FEATURE(DEBUG) */
+
+               ret_val = pg_dir_map(dir, reg->reg_base, reg->reg_base,
+                                                        reg->reg_size, attrs);
+
+               if(ret_val >= 0) {
+                       ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size,
+                                                                                reg->reg_type, attrs,
+                                                                                REGION_KERNEL | REGION_SHARED);
+               }
+
+               break;
+#if 0
+       case REGION_BSS:
+               /*
+                * The .bss section contains the _kernel_cr3 symbol, which is necessary
+                * for the interrupt handling code to be able to switch to the kernel
+                * page directory. Alternatively, interrupt handlers could also turn
+                * off paging to access _kernel_cr3, though...
+                */
+       case REGION_DATA:
+               /*
+                * FIXME: Duplicate the parent's .bss and .data, not the kernel's
+                *
+                * The kernel's .bss and .data sections only have to be present for
+                * processes that have been created by means of fork() + execfve()
+                * (or in other words drivers and system processes), but not for
+                * any other processes.
+                */
+
+               /* these regions are private to the process, so they may be writable */
+               attrs = reg->reg_attrs | PAGE_ATTR_USER | PAGE_ATTR_WRITABLE;
+
+               /* allocate new pages to the directory */
+               ret_val = pg_dir_map(dir, 0, reg->reg_base, reg->reg_size, attrs);
+
+               if(ret_val >= 0) {
+                       ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size,
+                                                                                reg->reg_type, attrs, reg->reg_flags);
+
+                       if(ret_val >= 0) {
+                               /* copy the contents of the pages */
+                               pg_dir_memcpy(dir, reg->reg_base, &_kernel_pgdir, reg->reg_base,
+                                                         reg->reg_size);
+                       }
+               }
+
+               break;
+#endif /* 0 */
+       default:
+               break;
+       }
+
+       return(ret_val);
+}
+
 int pg_dir_create(pg_dir_t **dst)
 {
        int ret_val;
@@ -756,8 +879,6 @@ int pg_dir_create(pg_dir_t **dst)
        ret_val = -ENOMEM;
 
        if(dir) {
-               struct region *reg;
-
                switch(_pg_flags & PG_MODE_MASK) {
                case PG_MODE_LEGACY:
                case PG_MODE_PAE:
@@ -785,38 +906,13 @@ int pg_dir_create(pg_dir_t **dst)
                 * We should probably cleanly separate the two, to avoid design flaws
                 * with speculative execution behavior of certain processors
                 */
-               for(reg = _kernel_pgdir.pd_regions; reg; reg = reg->reg_next) {
-                       u32_t attrs;
-
-                       switch(reg->reg_type) {
-                       case REGION_TEXT:
-                       case REGION_BSS:
-                       case REGION_DATA:
-                       case REGION_RODATA:
-                       case REGION_HEAP:
-                               /* make sure PAGE_ATTR_USER is set and PAGE_ATTR_WRITABLE is not set */
-                               attrs = (reg->reg_attrs | PAGE_ATTR_USER) & ~PAGE_ATTR_WRITABLE;
-
-                               dbg_printf("Mapping region %02x at 0x%08x:%08x (ATTR=%x)\n",
-                                                  reg->reg_type, reg->reg_base, reg->reg_size, attrs);
-
-                               ret_val = pg_dir_map(dir, reg->reg_base, reg->reg_base,
-                                                                        reg->reg_size, attrs);
-
-                               if(ret_val >= 0) {
-                                       ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size,
-                                                                                                reg->reg_type, attrs);
-                               }
-
-                               break;
-
-                       default:
-                               break;
-                       }
-               }
+               pg_dir_foreach_region(&_kernel_pgdir, _clone_kernel_region, dir);
 
+#if 0
                /* map the vesa memory into the pagedir */
-               pg_dir_map(dir, (void*)0xb8000, (void*)0xb8000, 0x2000, PAGE_ATTR_PRESENT | PAGE_ATTR_WRITABLE);
+               pg_dir_map(dir, (void*)0xb8000, (void*)0xb8000, 0x2000,
+                                  PAGE_ATTR_PRESENT | PAGE_ATTR_WRITABLE);
+#endif /* 0 */
 
                /* allocate the kernel stack */
                ret_val = _pg_dir_kstack_map(dir);
@@ -862,9 +958,11 @@ static int _pg_dir_map_legacy(page_table_t *pd, u32_t paddr, u32_t vaddr,
                        /*
                         * Allocate a large page if we're at a large page boundary
                         * and we're supposed to allocate at least the size of a large page
+                        * and if we're mapping already allocated pages (paddr != NULL)
                         */
-                       if(size >= PAGE_SIZE_LARGE && ALIGNED(vaddr, PAGE_SIZE_LARGE)) {
-                               pd->pt_entries[pde] = vaddr | PAGE_ATTR_SIZE | PAGE_ATTR_PRESENT | flags;
+                       if(size >= PAGE_SIZE_LARGE && ALIGNED(vaddr, PAGE_SIZE_LARGE) &&
+                          paddr && ALIGNED(paddr, PAGE_SIZE_LARGE)) {
+                               pd->pt_entries[pde] = paddr | PAGE_ATTR_SIZE | PAGE_ATTR_PRESENT | flags;
 
                                paddr += PAGE_SIZE_LARGE;
                                vaddr += PAGE_SIZE_LARGE;
@@ -902,8 +1000,10 @@ static int _pg_dir_map_legacy(page_table_t *pd, u32_t paddr, u32_t vaddr,
                /* allocate new frames if caller didn't specify a physical address */
                if(!paddr) {
                        pt->pt_entries[pte] = (u32_t)pg_frame_alloc_end();
+                       dbg_printf("Allocating page: %04u - 0x%08x\n", pte, pt->pt_entries[pte]);
                } else {
                        pt->pt_entries[pte] = paddr;
+                       paddr += PAGE_SIZE;
                }
 
                /* return -ENOMEM if we couldn't allocate a frame */
@@ -916,7 +1016,6 @@ static int _pg_dir_map_legacy(page_table_t *pd, u32_t paddr, u32_t vaddr,
                pt->pt_entries[pte] |= PAGE_ATTR_PRESENT | flags;
 
                vaddr += PAGE_SIZE;
-               paddr += PAGE_SIZE;
                size -= PAGE_SIZE;
        }
 
@@ -1015,33 +1114,136 @@ int pg_dir_map(pg_dir_t *pd, const void *phys, const void *virt,
        return(ret_val);
 }
 
+int pg_dir_map_region(pg_dir_t *dpd, pg_dir_t *spd, region_t *reg)
+{
+       int ret_val;
+       u32_t vaddr;
+       int idx;
+
+       ret_val = -EFAULT;
+
+       /*
+        * First of all, check if the destionation page dir has an empty slot
+        * that we can use to reference the region.
+        */
+       for(idx = 0; idx < CONFIG_PAGING_DIR_MAXREGIONS; idx++) {
+               if(!dpd->pd_regions[idx]) {
+                       break;
+               }
+       }
+
+       if(idx >= CONFIG_PAGING_DIR_MAXREGIONS) {
+               ret_val = -ERANGE;
+               goto gtfo;
+       }
+
+       for(vaddr = (u32_t)reg->reg_base;
+               vaddr < ((u32_t)reg->reg_base + reg->reg_size);
+               vaddr += reg->reg_pgsize) {
+               u32_t paddr;
+
+               /* since the pages may not be continguous in memory, map each page separately */
+               ret_val = _pg_dir_vpxlate(spd, vaddr, &paddr);
+
+               if(ret_val < 0) {
+                       break;
+               }
+
+               ret_val = pg_dir_map(dpd, (void*)paddr, (void*)vaddr,
+                                                        reg->reg_size, reg->reg_flags);
+
+               if(ret_val < 0) {
+                       break;
+               }
+       }
+
+       if(ret_val < 0) {
+               /* undo mappings that have been added during this call */
+               pg_dir_unmap(dpd, reg->reg_base, reg->reg_size);
+       } else {
+               /* mark region as shared and increase its refcount */
+               reg->reg_flags |= REGION_SHARED;
+               reg->reg_refs++;
+               dpd->pd_regions[idx] = reg;
+       }
+
+gtfo:
+       return(ret_val);
+}
+
 int pg_dir_unmap(pg_dir_t *pd, const void *base, const u32_t size)
 {
        return(-ENOSYS);
 }
 
+int pg_dir_foreach_region(pg_dir_t *pd, int (*func)(pg_dir_t*, region_t*, void*), void *data)
+{
+       int ret_val;
+
+       ret_val = -EINVAL;
+
+       if(pd && func) {
+               int i;
+
+               /* FIXME: Lock the pagedir */
+
+               for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+                       if(pd->pd_regions[i]) {
+                               ret_val = func(pd, pd->pd_regions[i], data);
+
+                               if(ret_val < 0) {
+                                       break;
+                               }
+                       }
+               }
+
+               /* FIXME: Unlock the pagedir */
+       }
+
+       return(ret_val);
+}
+
 static int _pg_dir_add_region(pg_dir_t *pd, void *base, u32_t size,
-                                                         u32_t type, u32_t attrs)
+                                                         u32_t type, u32_t attrs, u32_t flags)
 {
        struct region *reg;
        int ret_val;
+       int i;
 
-       ret_val = -ENOMEM;
-       reg = kmalloc(sizeof(*reg));
+       ret_val = -EFAULT;
 
-       if(reg) {
-               reg->reg_base = base;
-               reg->reg_size = size;
-               reg->reg_pgsize = PAGE_SIZE;
-               reg->reg_type = type;
-               reg->reg_attrs = attrs;
+       /* find a usable slot for the region */
+       for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+               if(!pd->pd_regions[i]) {
+                       break;
+               }
+       }
 
-               reg->reg_next = pd->pd_regions;
-               pd->pd_regions = reg;
+       if(i >= CONFIG_PAGING_DIR_MAXREGIONS) {
+               /* no usable slot found */
+               ret_val = -ERANGE;
+               goto gtfo;
+       }
 
-               ret_val = 0;
+       reg = kmalloc(sizeof(*reg));
+
+       if(!reg) {
+               ret_val = -ENOMEM;
+               goto gtfo;
        }
 
+       reg->reg_base = base;
+       reg->reg_size = size;
+       reg->reg_pgsize = PAGE_SIZE;
+       reg->reg_type = type;
+       reg->reg_attrs = attrs;
+       reg->reg_flags = flags;
+       reg->reg_refs = 1;
+
+       pd->pd_regions[i] = reg;
+       ret_val = 0;
+
+gtfo:
        return(ret_val);
 }
 
index cb0fb6bb5c33e8039c3b096a1b810e731802cd62..409687089d904867fe4c66fb92a9584fe9ef4d00 100644 (file)
 #define __PAGING_H
 
 #include <corax/types.h>
+#include <arch.h>
 #include "defs.h"
 
-#define REGION_TEXT   0
-#define REGION_HEAP   1
-#define REGION_STACK  2
-#define REGION_BSS    3
-#define REGION_DATA   4
-#define REGION_RODATA 5
-#define REGION_KSTACK 6
-
 #define FLAG_SHARED   (1 << 0)
 
-struct region {
-       struct region *reg_next;
-
-       void *reg_base;
-       u32_t reg_type;
-       u32_t reg_size;
-       u32_t reg_pgsize;
-       u32_t reg_attrs;
-       u32_t reg_flags;
-};
-
 struct pagedir {
        void  *pd_base;
        u32_t pd_flags;
 
-       struct region *pd_regions;
+       struct region *pd_regions[CONFIG_PAGING_DIR_MAXREGIONS];
 };
 
 typedef struct pdpt pdpt_t;
index ed9b2b9c5cacc874424f7fc358a9cdba96401f59..7ac705de062cbd7f63ef5b54e6bb585ad299ebd0 100644 (file)
@@ -70,6 +70,30 @@ struct task {
        pid_t t_pid;
 } __attribute__((packed));
 
+#define REGION_TEXT   0
+#define REGION_BSS    1
+#define REGION_DATA   2
+#define REGION_RODATA 3
+#define REGION_KSTACK 4
+#define REGION_STACK  5
+#define REGION_HEAP   6
+
+#define REGION_SHARED (1 << 0)
+#define REGION_KERNEL (1 << 1)
+#define REGION_PRIV   (1 << 2)
+
+typedef struct region region_t;
+
+struct region {
+       void *reg_base;
+       u32_t reg_type;
+       u32_t reg_size;
+       u32_t reg_pgsize;
+       u32_t reg_attrs;
+       u32_t reg_flags;
+       u32_t reg_refs;
+};
+
 int     cpu_get_id(void);
 u64_t   cpu_get_capabilities(void);
 u64_t   cpu_timestamp(void);
@@ -114,13 +138,16 @@ void    task_move_stack(void*, void*, u32_t);
 typedef struct pagedir pg_dir_t;
 
 int     pg_dir_create(pg_dir_t**);
+
 int     pg_dir_map(pg_dir_t*, const void*, const void*, const u32_t, const u32_t);
+int     pg_dir_map_region(pg_dir_t*, pg_dir_t*, region_t*);
 int     pg_dir_unmap(pg_dir_t*, const void*, const u32_t);
+
 void*   pg_dir_get_pdbr(pg_dir_t*);
 int     _pg_dir_vpxlate(pg_dir_t*, u32_t, u32_t*);
 void*   pg_dir_get_kstack(pg_dir_t*);
 void*   pg_dir_get_ustack(pg_dir_t*);
-
+int     pg_dir_foreach_region(pg_dir_t*, int(*)(pg_dir_t*, struct region*, void*), void*);
 int     pg_dir_memcpy(pg_dir_t*, void*, pg_dir_t*, void*, u32_t);
 
 #endif /* !__ASSEMBLY_SOURCE */