]> git.corax.cc Git - corax/commitdiff
kernel/core: Finish execeve() implementation
authorMatthias Kruk <m@m10k.eu>
Tue, 5 May 2020 17:23:39 +0000 (02:23 +0900)
committerMatthias Kruk <m@m10k.eu>
Tue, 5 May 2020 17:23:39 +0000 (02:23 +0900)
kernel/core/posixcall.c

index 5186bf87c5d02235c80bfcaea7cf1fbd99f6e970..38ebfffebfd25d4a1750983810b2ac88129c5051 100644 (file)
@@ -5,6 +5,7 @@
 #include <corax/errno.h>
 #include <corax/syscall.h>
 #include <corax/types.h>
+#include <corax/heap.h>
 #include <sys/mman.h>
 #include <arch.h>
 #include <process.h>
@@ -305,7 +306,7 @@ static int _load_elf_section(struct elf_hdr *hdr, struct elf_phdr *phdr, void *d
        int prot;
 
        pd = (pg_dir_t*)data;
-       ret_val = -EINVAL;
+       ret_val = 0;
        prot = 0;
 
        switch(phdr->p_type) {
@@ -362,6 +363,280 @@ static int _load_elf_section(struct elf_hdr *hdr, struct elf_phdr *phdr, void *d
        return(ret_val);
 }
 
+static int _prepare_user_stack(pg_dir_t *pd, char **u_argv,
+                              char **u_envp, char **dst)
+{
+       int ret_val;
+       char *k_stackbase;
+       size_t reqsize;
+       size_t argv_size;
+       size_t envp_size;
+       int argc;
+       int envc;
+
+       /*
+        * We're going to arrange the stack something like this:
+        *
+        *              [ "=b\0" ]        --- Example ---
+        *              [ "ENV1" ]        Cmd: /bin/bash arg0 arg1 arg2
+        *           .-----^              Env: ENV0=a ENV1=b
+        *           |  [ "0=a\0"]
+        *           |  [ "\0ENV"]        <--- k_stackbase + OFF_ENV0
+        *         .-+-------^                     - 1 (to be exact)
+        *         | |  [ "arg2" ]
+        *       .-+-+-----^
+        *       | | |  [ "rg1\0"]
+        *       | | |  [ "g0\0a"]
+        *     .-+-+-+---------^
+        *     | | | |  [ "h\0ar"]
+        *   .-+-+-+-+--------^
+        *   | | | | |  [ "/bas" ]
+        * .-+-+-+-+-+->[ "/bin" ]        <--- k_stackbase + OFF_ARG0
+        * | | | | | |  [ 0x0000 ]        <--- k_stackbase + OFF_ENVPZ
+        * | | | | | '- [ 0x0f20 ]
+        * | | | | '--- [ 0x0f18 ] <-.    <--- k_stackbase + OFF_ENVPN(0)
+        * | | | |      [ 0x0000 ]   |    <--- k_stackbase + OFF_ARGVZ
+        * | | | '----- [ 0x0f14 ]   |
+        * | | '------- [ 0x0f0f ]   |
+        * | '--------- [ 0x0f0a ]   |
+        * '----------- [ 0x0f00 ] <-+-.  <--- k_stackbase + OFF_ARGVN(0)
+        *              [  envp  ] --' |  <--- k_stackbase + OFF_ENVP
+        *              [  argv  ] ----'  <--- k_stackbase + OFF_ARGV
+        *              [  argc  ] = 4    <--- k_stackbase + OFF_ARGC
+        *
+        * Which means for the size of the stack that will be allocated:
+        *
+        *      envp_size
+        *    + (envc + 1) * sizeof(char*)
+        *    + argv_size
+        *    + (argc + 1) * sizeof(char*)
+        *    + sizeof(char**)
+        *    + sizeof(char**)
+        *    + sizeof(int)
+        *   ------------------------------
+        *    = stack frame size
+        *
+        */
+
+#define OFF_ARGC     (0)
+#define OFF_ARGV     (sizeof(unsigned) * 1)
+#define OFF_ENVP     (sizeof(unsigned) * 2)
+#define OFF_ARGVN(n) (OFF_ENVP + ((n) + 1) * sizeof(unsigned))
+#define OFF_ARGVZ    (OFF_ARGVN(argc))
+#define OFF_ENVPN(n) (OFF_ARGVZ + ((n) + 1) * sizeof(unsigned))
+#define OFF_ENVPZ    (OFF_ENVPN(envc))
+#define OFF_ARG0     (OFF_ENVPZ + sizeof(unsigned))
+#define OFF_ENV0     (OFF_ARG0 + argv_size)
+
+       argv_size = 0;
+       envp_size = 0;
+       argc = 0;
+       envc = 0;
+       k_stackbase = NULL;
+
+       if(u_argv) {
+               char **argv;
+               char *arg;
+
+               ret_val = _pg_dir_vpxlate(pd, (u32_t)u_argv, (u32_t*)&argv);
+
+               if(ret_val < 0) {
+                       goto cleanup;
+               }
+
+               for(argc = 0; argv[argc]; argc++) {
+                       size_t alen;
+
+                       ret_val = _pg_dir_vpxlate(pd, (u32_t)argv[argc],
+                                                 (u32_t*)&arg);
+
+                       if(ret_val < 0) {
+                               goto cleanup;
+                       }
+
+                       alen = strlen(arg) + 1;
+                       argv_size += alen;
+               }
+       }
+
+       if(u_envp) {
+               char **envp;
+               char *env;
+
+               ret_val = _pg_dir_vpxlate(pd, (u32_t)u_envp, (u32_t*)&envp);
+
+               if(ret_val < 0) {
+                       goto cleanup;
+               }
+
+               for(envc = 0; envp[envc]; envc++) {
+                       size_t elen;
+
+                       ret_val = _pg_dir_vpxlate(pd, (u32_t)envp[envc],
+                                                 (u32_t*)&env);
+
+                       if(ret_val < 0) {
+                               goto cleanup;
+                       }
+
+                       elen = strlen(env) + 1;
+                       envp_size += elen;
+               }
+       }
+
+       reqsize = envp_size + argv_size +
+               (envc + 1) * sizeof(char*) +
+               (argc + 1) * sizeof(char*) +
+               sizeof(char**) + sizeof(char**) +
+               sizeof(int);
+
+       reqsize = ALIGN(reqsize, 16);
+
+       k_stackbase = kmalloc(reqsize);
+
+       if(!k_stackbase) {
+               ret_val = -ENOMEM;
+               goto cleanup;
+       }
+
+       *(int*)(k_stackbase + OFF_ARGC) = argc;
+       *(char***)(k_stackbase + OFF_ARGV) = (char**)(k_stackbase +
+                                                     OFF_ARGVN(0));
+       *(char***)(k_stackbase + OFF_ENVP) = (char**)(k_stackbase +
+                                                     OFF_ENVPN(0));
+
+       /*
+        * We can skip the error checking after _pg_dir_vpxlate() this time
+        * since we wouldn't be here if errors had occured back then.
+        */
+       if(u_argv) {
+               char **argv;
+               char *ptr;
+               int i;
+
+               _pg_dir_vpxlate(pd, (u32_t)u_argv, (u32_t*)&argv);
+
+               for(i = 0, ptr = k_stackbase + OFF_ARG0; i < argc; i++) {
+                       char *arg;
+                       int len;
+
+                       _pg_dir_vpxlate(pd, (u32_t)argv[i], (u32_t*)&arg);
+
+                       /* FIXME: We need some sort of pg_dir_strlen() */
+                       len = strlen(arg);
+                       memcpy(ptr, arg, len);
+                       ptr[len] = 0;
+
+                       *(char**)(k_stackbase + OFF_ARGVN(i)) = ptr;
+
+                       ptr += len + 1;
+               }
+       }
+
+       *(char**)(k_stackbase + OFF_ARGVZ) = NULL;
+
+       if(u_envp) {
+               char **envp;
+               char *ptr;
+               int i;
+
+               _pg_dir_vpxlate(pd, (u32_t)u_envp, (u32_t*)&envp);
+
+               for(i = 0, ptr = k_stackbase + OFF_ENV0; i < envc; i++) {
+                       char *env;
+                       int len;
+
+                       _pg_dir_vpxlate(pd, (u32_t)envp[i], (u32_t*)&env);
+
+                       /* FIXME: Same as above */
+                       len = strlen(env);
+
+                       memcpy(ptr, env, len);
+                       ptr[len] = 0;
+
+                       *(char**)(k_stackbase + OFF_ENVPN(i)) = ptr;
+
+                       ptr += len + 1;
+               }
+       }
+
+       *(char**)(k_stackbase + OFF_ENVPZ) = NULL;
+
+#if DEBUG_EXECEVE
+       {
+               char **args;
+               int i;
+
+               args = *(char***)(k_stackbase + OFF_ARGV);
+
+               dbg_printf("args = %p\n", args);
+               for(i = 0; args[i]; i++) {
+                       dbg_printf("args[%02u] = \"%s\"\n", i, args[i]);
+               }
+       }
+#endif /* DEBUG_EXECEVE */
+
+#undef OFF_ARGC
+#undef OFF_ARGV
+#undef OFF_ENVP
+#undef OFF_ARGVN
+#undef OFF_ARGVZ
+#undef OFF_ENVPN
+#undef OFF_ENVPZ
+#undef OFF_ARG0
+#undef OFF_ENV0
+
+cleanup:
+       if(ret_val < 0) {
+               if(k_stackbase) {
+                       kfree(k_stackbase);
+               }
+       } else {
+               *dst = k_stackbase;
+               ret_val = (int)reqsize;
+       }
+
+       return(ret_val);
+}
+
+static void _user_stack_rebase(char *oldbase, char *newbase)
+{
+       /*
+        * The pointers we're mangling here are strictly speaking not
+        * unsigned long but a variety of char*, char**, and the occasional
+        * char***, but in order to retain my sanity I'll ignore that and
+        * treat them as register-sized addresses (or generic pointers).
+        */
+       struct {
+               int argc;
+               unsigned long *argv;
+               unsigned long *envp;
+       } *stacktop;
+       int i;
+
+       stacktop = (void*)oldbase;
+
+       for(i = 0; stacktop->argv[i]; i++) {
+               stacktop->argv[i] -= (unsigned long)oldbase;
+               stacktop->argv[i] += (unsigned long)newbase;
+       }
+
+       for(i = 0; stacktop->envp[i]; i++) {
+               stacktop->envp[i] -= (unsigned long)oldbase;
+               stacktop->envp[i] += (unsigned long)newbase;
+       }
+
+       stacktop->argv = (unsigned long*)((unsigned long)stacktop->argv -
+                                         (unsigned long)oldbase +
+                                         (unsigned long)newbase);
+
+       stacktop->envp = (unsigned long*)((unsigned long)stacktop->envp -
+                                         (unsigned long)oldbase +
+                                         (unsigned long)newbase);
+
+       return;
+}
+
 int sys_execeve(stack_frame_t *stk)
 {
        int ret_val;
@@ -371,27 +646,50 @@ int sys_execeve(stack_frame_t *stk)
        pg_dir_t *old_pd;
        void *old_stack;
        void *new_stack;
-       void *usr_stack;
+       char *usr_stack;
 
-       void *elfdata;
+       void *usr_elfdata;
+       void *krn_elfdata;
        size_t elfsize;
        char **argv;
        char **envp;
+       char *initstack;
+       int initstack_size;
 
-       elfdata = (void*)stk->ebx;
+       usr_elfdata = (void*)stk->ebx;
        elfsize = (size_t)stk->ecx;
        argv = (char**)stk->edx;
        envp = (char**)stk->esi;
        new_pd = NULL;
 
-       ret_val = elf_validate(elfdata, elfsize);
+       cproc = process_get_current();
+       ctask = task_get_current();
+
+        old_stack = ctask->t_kstack;
+       process_get_pagedir(cproc, &old_pd);
+
+       /* FIXME: Validate elfsize */
+       krn_elfdata = kmalloc(elfsize);
+
+       if(!krn_elfdata) {
+               ret_val = -ENOMEM;
+               goto cleanup;
+       }
+
+       process_memcpy_ptok(cproc, krn_elfdata, usr_elfdata, elfsize);
+
+       ret_val = elf_validate(krn_elfdata, elfsize);
 
        if(ret_val < 0) {
                goto cleanup;
        }
 
-       cproc = process_get_current();
-       ctask = task_get_current();
+       initstack_size = _prepare_user_stack(old_pd, argv, envp, &initstack);
+
+       if(initstack_size < 0) {
+               ret_val = initstack_size;
+               goto cleanup;
+       }
 
        ret_val = pg_dir_create(&new_pd);
 
@@ -399,13 +697,13 @@ int sys_execeve(stack_frame_t *stk)
                goto cleanup;
        }
 
-       ret_val = elf_phdr_foreach(elfdata, elfsize, _load_elf_section, new_pd);
+       ret_val = elf_phdr_foreach(krn_elfdata, elfsize,
+                                  _load_elf_section, new_pd);
 
        if(ret_val < 0) {
                goto cleanup;
        }
 
-       process_get_pagedir(cproc, &old_pd);
        process_set_pagedir(cproc, new_pd);
 
        /*
@@ -419,20 +717,33 @@ int sys_execeve(stack_frame_t *stk)
         * page directory that we're about to free. We have to copy the old
         * stack to the new one and update ESP, EBP.
         */
-       old_stack = ctask->t_kstack;
        new_stack = pg_dir_get_kstack(new_pd);
 
        /* FIXME: Stack size may not be static */
-       usr_stack = (void*)((u32_t)pg_dir_get_ustack(new_pd) +
-                        CONFIG_USER_STACK_SIZE);
-
-       /* prepare the stack frame for the function */
-       usr_stack = setstackvfe(usr_stack, argv, envp);
-
-       /* make the task execute the new function upon return from the interrupt */
-       stk->eip = (u32_t)elf_entry(elfdata);
-       stk->ebp = (u32_t)usr_stack;
-       stk->esp = (u32_t)usr_stack;
+       usr_stack = (char*)pg_dir_get_ustack(new_pd) +
+               CONFIG_USER_STACK_SIZE - initstack_size;
+
+       /* adjust argv and envp */
+       _user_stack_rebase(initstack, usr_stack);
+
+#if DEBUG_EXECEVE
+       dbg_printf("KSTK in [ 0x%08x, 0x%08x ]\n", initstack,
+                  initstack + initstack_size);
+       dbg_printf("USTK in [ 0x%08x, 0x%08x ]\n", usr_stack,
+                  usr_stack + initstack_size);
+       dbg_printf("STK: 0x%08x\n", *((u32_t*)initstack + 3));
+       dbg_printf("STK: 0x%08x\n", *((u32_t*)initstack + 2));
+       dbg_printf("STK: 0x%08x\n", *((u32_t*)initstack + 1));
+       dbg_printf("STK: 0x%08x\n", *(u32_t*)initstack);
+#endif /* DEBUG_EXECEVE */
+
+       /* copy the initial stack into the process address space */
+       process_memcpy_ktop(cproc, usr_stack, initstack, initstack_size);
+
+       /* make the task jump to the ELF's entry upon return from the syscall */
+       stk->eip = (u32_t)elf_entry(krn_elfdata);
+       stk->ebp = 0;
+       stk->esp = 0; /* will be ignored */
        stk->prevesp = (u32_t)usr_stack;
 
        /*
@@ -455,6 +766,14 @@ cleanup:
                }
        }
 
+       if(initstack) {
+               kfree(initstack);
+       }
+
+       if(krn_elfdata) {
+               kfree(krn_elfdata);
+       }
+
        return(ret_val);
 }