"Intel64 mode"
};
-static int _pg_dir_add_region(pg_dir_t*, void*, u32_t, u32_t, u32_t);
+static int _pg_dir_add_region(pg_dir_t*, void*, u32_t, u32_t, u32_t, u32_t);
int _pg_dir_vpxlate(pg_dir_t*, u32_t, u32_t*);
int _pg_dir_pagesize(pg_dir_t*, u32_t, u32_t*);
int _pg_dir_xfer(pg_dir_t*, void*, pg_dir_t*, void*, u32_t);
static void _pg_dir_debug_regions(pg_dir_t *pd)
{
struct region *reg;
+ int i;
+
+ for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+ reg = pd->pd_regions[i];
- for(reg = pd->pd_regions; reg; reg = reg->reg_next) {
dbg_printf("Region type %02x at 0x%08x:%08x\n",
reg->reg_type, reg->reg_base, reg->reg_size);
}
_kernel_pgdir.pd_base = (void*)cr3;
_pg_dir_add_region(&_kernel_pgdir, TEXT_BASE, TEXT_SIZE,
- REGION_TEXT, PAGE_ATTR_USER);
+ REGION_TEXT, PAGE_ATTR_USER,
+ REGION_KERNEL | REGION_SHARED);
_pg_dir_add_region(&_kernel_pgdir, RODATA_BASE, RODATA_SIZE,
- REGION_RODATA, PAGE_ATTR_NO_EXEC);
+ REGION_RODATA, PAGE_ATTR_NO_EXEC,
+ REGION_KERNEL | REGION_SHARED);
_pg_dir_add_region(&_kernel_pgdir, DATA_BASE, DATA_SIZE,
- REGION_DATA, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC);
+ REGION_DATA, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC,
+ REGION_KERNEL | REGION_SHARED);
_pg_dir_add_region(&_kernel_pgdir, BSS_BASE, BSS_SIZE,
- REGION_BSS, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC);
+ REGION_BSS, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC,
+ REGION_KERNEL | REGION_SHARED);
/* heap region also includes allocations from _phys_alloc() */
_pg_dir_add_region(&_kernel_pgdir, &_mem_start,
_mem_start - (u32_t)&_mem_start + CONFIG_KERNEL_HEAP_SIZE,
- REGION_HEAP, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC);
+ REGION_HEAP, PAGE_ATTR_WRITABLE | PAGE_ATTR_NO_EXEC,
+ REGION_KERNEL | REGION_SHARED);
/* mark all page frames from 0x0 to the end of the kernel heap as used */
for(i = 0; i < _mem_start + CONFIG_KERNEL_HEAP_SIZE; i += PAGE_SIZE) {
{
struct region *reg;
void *ret_val;
+ int i;
+
+ for(ret_val = NULL, i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+ reg = pgdir->pd_regions[i];
- for(ret_val = NULL, reg = pgdir->pd_regions; reg; reg = reg->reg_next) {
if(reg->reg_type == REGION_KSTACK) {
u32_t virt;
u32_t phys;
+ /*
+ * This function is supposed to return the linear address
+ * of the kernel stack, so we have to translate it before
+ * returning the address to the caller.
+ */
virt = (u32_t)reg->reg_base;
_pg_dir_vpxlate(pgdir, virt, &phys);
{
struct region *reg;
void *ret_val;
+ int i;
+
+ for(ret_val = NULL, i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+ reg = pgdir->pd_regions[i];
- for(ret_val = NULL, reg = pgdir->pd_regions; reg; reg = reg->reg_next) {
if(reg->reg_type == REGION_STACK) {
ret_val = reg->reg_base;
break;
goto cleanup;
}
- ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_KSTACK, attrs);
+ ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_KSTACK, attrs,
+ REGION_PRIV);
if(ret_val < 0) {
dbg_printf("_pg_dir_add_region() failed\n");
goto cleanup;
}
- ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_STACK, attrs);
+ ret_val = _pg_dir_add_region(pgdir, vaddr, PAGE_SIZE, REGION_STACK, attrs,
+ REGION_PRIV);
if(ret_val < 0) {
goto cleanup;
return(ret_val);
}
+int _clone_kernel_region(pg_dir_t *kdir, region_t *reg, void *data)
+{
+ pg_dir_t *dir;
+ u32_t attrs;
+ int ret_val;
+
+ ret_val = 0;
+ dir = (pg_dir_t*)data;
+
+ switch(reg->reg_type) {
+ case REGION_TEXT:
+ case REGION_RODATA:
+ case REGION_HEAP:
+ /*
+ * The kernel's .text and .rodata region are mapped directly into the page
+ * directory since they cannot be modified or removed anyways.
+ */
+ case REGION_BSS:
+ case REGION_DATA:
+ /*
+ * The way the interrupt handling is currently implemented, accesses to the
+ * _cpu structure and _kernel_cr3 are made within the context (i.e. using
+ * the page directory of) the user-mode process. For that reason the .bss
+ * and .data sections also have to be mapped into the address space of the
+ * user space process. See the FIXME below.
+ */
+
+ /*
+ * FIXME: Only map the kernel .text section into the process's page dir
+ *
+ * When an interrupt occurs, the processor will push the current context
+ * onto the process's kernel-mode stack. The interrupt handling code is
+ * located in the kernel's .text region, which is why we can't get around
+ * mapping at least that into the process's page directory. However, the
+ * interrupt handler will switch to the kernel's page directory immediately
+ * after the context has been pushed onto the stack, so it's really not
+ * necessary for the process to have even read-access to the kernel's
+ * .rodata, heap, or anything else.
+ */
+
+ /* make sure PAGE_ATTR_USER is set and PAGE_ATTR_WRITABLE is not set */
+ attrs = (reg->reg_attrs | PAGE_ATTR_USER) & ~PAGE_ATTR_WRITABLE;
+
+#if FEATURE(DEBUG)
+ dbg_printf("Mapping region %02x at 0x%08x:%08x (ATTR=%x)\n",
+ reg->reg_type, reg->reg_base, reg->reg_size, attrs);
+#endif /* FEATURE(DEBUG) */
+
+ ret_val = pg_dir_map(dir, reg->reg_base, reg->reg_base,
+ reg->reg_size, attrs);
+
+ if(ret_val >= 0) {
+ ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size,
+ reg->reg_type, attrs,
+ REGION_KERNEL | REGION_SHARED);
+ }
+
+ break;
+#if 0
+ case REGION_BSS:
+ /*
+ * The .bss section contains the _kernel_cr3 symbol, which is necessary
+ * for the interrupt handling code to be able to switch to the kernel
+ * page directory. Alternatively, interrupt handlers could also turn
+ * off paging to access _kernel_cr3, though...
+ */
+ case REGION_DATA:
+ /*
+ * FIXME: Duplicate the parent's .bss and .data, not the kernel's
+ *
+ * The kernel's .bss and .data sections only have to be present for
+ * processes that have been created by means of fork() + execfve()
+ * (or in other words drivers and system processes), but not for
+ * any other processes.
+ */
+
+ /* these regions are private to the process, so they may be writable */
+ attrs = reg->reg_attrs | PAGE_ATTR_USER | PAGE_ATTR_WRITABLE;
+
+ /* allocate new pages to the directory */
+ ret_val = pg_dir_map(dir, 0, reg->reg_base, reg->reg_size, attrs);
+
+ if(ret_val >= 0) {
+ ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size,
+ reg->reg_type, attrs, reg->reg_flags);
+
+ if(ret_val >= 0) {
+ /* copy the contents of the pages */
+ pg_dir_memcpy(dir, reg->reg_base, &_kernel_pgdir, reg->reg_base,
+ reg->reg_size);
+ }
+ }
+
+ break;
+#endif /* 0 */
+ default:
+ break;
+ }
+
+ return(ret_val);
+}
+
int pg_dir_create(pg_dir_t **dst)
{
int ret_val;
ret_val = -ENOMEM;
if(dir) {
- struct region *reg;
-
switch(_pg_flags & PG_MODE_MASK) {
case PG_MODE_LEGACY:
case PG_MODE_PAE:
* We should probably cleanly separate the two, to avoid design flaws
* with speculative execution behavior of certain processors
*/
- for(reg = _kernel_pgdir.pd_regions; reg; reg = reg->reg_next) {
- u32_t attrs;
-
- switch(reg->reg_type) {
- case REGION_TEXT:
- case REGION_BSS:
- case REGION_DATA:
- case REGION_RODATA:
- case REGION_HEAP:
- /* make sure PAGE_ATTR_USER is set and PAGE_ATTR_WRITABLE is not set */
- attrs = (reg->reg_attrs | PAGE_ATTR_USER) & ~PAGE_ATTR_WRITABLE;
-
- dbg_printf("Mapping region %02x at 0x%08x:%08x (ATTR=%x)\n",
- reg->reg_type, reg->reg_base, reg->reg_size, attrs);
-
- ret_val = pg_dir_map(dir, reg->reg_base, reg->reg_base,
- reg->reg_size, attrs);
-
- if(ret_val >= 0) {
- ret_val = _pg_dir_add_region(dir, reg->reg_base, reg->reg_size,
- reg->reg_type, attrs);
- }
-
- break;
-
- default:
- break;
- }
- }
+ pg_dir_foreach_region(&_kernel_pgdir, _clone_kernel_region, dir);
+#if 0
/* map the vesa memory into the pagedir */
- pg_dir_map(dir, (void*)0xb8000, (void*)0xb8000, 0x2000, PAGE_ATTR_PRESENT | PAGE_ATTR_WRITABLE);
+ pg_dir_map(dir, (void*)0xb8000, (void*)0xb8000, 0x2000,
+ PAGE_ATTR_PRESENT | PAGE_ATTR_WRITABLE);
+#endif /* 0 */
/* allocate the kernel stack */
ret_val = _pg_dir_kstack_map(dir);
/*
* Allocate a large page if we're at a large page boundary
* and we're supposed to allocate at least the size of a large page
+ * and if we're mapping already allocated pages (paddr != NULL)
*/
- if(size >= PAGE_SIZE_LARGE && ALIGNED(vaddr, PAGE_SIZE_LARGE)) {
- pd->pt_entries[pde] = vaddr | PAGE_ATTR_SIZE | PAGE_ATTR_PRESENT | flags;
+ if(size >= PAGE_SIZE_LARGE && ALIGNED(vaddr, PAGE_SIZE_LARGE) &&
+ paddr && ALIGNED(paddr, PAGE_SIZE_LARGE)) {
+ pd->pt_entries[pde] = paddr | PAGE_ATTR_SIZE | PAGE_ATTR_PRESENT | flags;
paddr += PAGE_SIZE_LARGE;
vaddr += PAGE_SIZE_LARGE;
/* allocate new frames if caller didn't specify a physical address */
if(!paddr) {
pt->pt_entries[pte] = (u32_t)pg_frame_alloc_end();
+ dbg_printf("Allocating page: %04u - 0x%08x\n", pte, pt->pt_entries[pte]);
} else {
pt->pt_entries[pte] = paddr;
+ paddr += PAGE_SIZE;
}
/* return -ENOMEM if we couldn't allocate a frame */
pt->pt_entries[pte] |= PAGE_ATTR_PRESENT | flags;
vaddr += PAGE_SIZE;
- paddr += PAGE_SIZE;
size -= PAGE_SIZE;
}
return(ret_val);
}
+int pg_dir_map_region(pg_dir_t *dpd, pg_dir_t *spd, region_t *reg)
+{
+ int ret_val;
+ u32_t vaddr;
+ int idx;
+
+ ret_val = -EFAULT;
+
+ /*
+ * First of all, check if the destionation page dir has an empty slot
+ * that we can use to reference the region.
+ */
+ for(idx = 0; idx < CONFIG_PAGING_DIR_MAXREGIONS; idx++) {
+ if(!dpd->pd_regions[idx]) {
+ break;
+ }
+ }
+
+ if(idx >= CONFIG_PAGING_DIR_MAXREGIONS) {
+ ret_val = -ERANGE;
+ goto gtfo;
+ }
+
+ for(vaddr = (u32_t)reg->reg_base;
+ vaddr < ((u32_t)reg->reg_base + reg->reg_size);
+ vaddr += reg->reg_pgsize) {
+ u32_t paddr;
+
+ /* since the pages may not be continguous in memory, map each page separately */
+ ret_val = _pg_dir_vpxlate(spd, vaddr, &paddr);
+
+ if(ret_val < 0) {
+ break;
+ }
+
+ ret_val = pg_dir_map(dpd, (void*)paddr, (void*)vaddr,
+ reg->reg_size, reg->reg_flags);
+
+ if(ret_val < 0) {
+ break;
+ }
+ }
+
+ if(ret_val < 0) {
+ /* undo mappings that have been added during this call */
+ pg_dir_unmap(dpd, reg->reg_base, reg->reg_size);
+ } else {
+ /* mark region as shared and increase its refcount */
+ reg->reg_flags |= REGION_SHARED;
+ reg->reg_refs++;
+ dpd->pd_regions[idx] = reg;
+ }
+
+gtfo:
+ return(ret_val);
+}
+
int pg_dir_unmap(pg_dir_t *pd, const void *base, const u32_t size)
{
return(-ENOSYS);
}
+int pg_dir_foreach_region(pg_dir_t *pd, int (*func)(pg_dir_t*, region_t*, void*), void *data)
+{
+ int ret_val;
+
+ ret_val = -EINVAL;
+
+ if(pd && func) {
+ int i;
+
+ /* FIXME: Lock the pagedir */
+
+ for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+ if(pd->pd_regions[i]) {
+ ret_val = func(pd, pd->pd_regions[i], data);
+
+ if(ret_val < 0) {
+ break;
+ }
+ }
+ }
+
+ /* FIXME: Unlock the pagedir */
+ }
+
+ return(ret_val);
+}
+
static int _pg_dir_add_region(pg_dir_t *pd, void *base, u32_t size,
- u32_t type, u32_t attrs)
+ u32_t type, u32_t attrs, u32_t flags)
{
struct region *reg;
int ret_val;
+ int i;
- ret_val = -ENOMEM;
- reg = kmalloc(sizeof(*reg));
+ ret_val = -EFAULT;
- if(reg) {
- reg->reg_base = base;
- reg->reg_size = size;
- reg->reg_pgsize = PAGE_SIZE;
- reg->reg_type = type;
- reg->reg_attrs = attrs;
+ /* find a usable slot for the region */
+ for(i = 0; i < CONFIG_PAGING_DIR_MAXREGIONS; i++) {
+ if(!pd->pd_regions[i]) {
+ break;
+ }
+ }
- reg->reg_next = pd->pd_regions;
- pd->pd_regions = reg;
+ if(i >= CONFIG_PAGING_DIR_MAXREGIONS) {
+ /* no usable slot found */
+ ret_val = -ERANGE;
+ goto gtfo;
+ }
- ret_val = 0;
+ reg = kmalloc(sizeof(*reg));
+
+ if(!reg) {
+ ret_val = -ENOMEM;
+ goto gtfo;
}
+ reg->reg_base = base;
+ reg->reg_size = size;
+ reg->reg_pgsize = PAGE_SIZE;
+ reg->reg_type = type;
+ reg->reg_attrs = attrs;
+ reg->reg_flags = flags;
+ reg->reg_refs = 1;
+
+ pd->pd_regions[i] = reg;
+ ret_val = 0;
+
+gtfo:
return(ret_val);
}