From c5ce170469c715f41edccdef3fbc36a3be401efd Mon Sep 17 00:00:00 2001
From: Matthias Kruk <m@m10k.eu>
Date: Sat, 8 Feb 2020 23:26:08 +0900
Subject: [PATCH] klibc/mutex: Avoid priority inversion in the mutex
 implementation

Wake up all tasks waiting on a mutex when it gets unlocked. This way,
the order in which the waiting tasks will acquire the mutex depends
solely on the scheduler (and the tasks priority), not implicitly on
the mutex.
---
 kernel/include/mutex.h |  17 +++---
 kernel/klibc/mutex.c   | 117 +++++++++++++++++++++++++++--------------
 2 files changed, 86 insertions(+), 48 deletions(-)

diff --git a/kernel/include/mutex.h b/kernel/include/mutex.h
index e77b2f4..40485ff 100644
--- a/kernel/include/mutex.h
+++ b/kernel/include/mutex.h
@@ -11,15 +11,16 @@ typedef struct _mutex mutex_t;
  *
  * DESCRIPTION
  *  The mutex_lock() function locks the mutex pointed to by `mutex'.
- *  If the mutex cannot be immediately acquired, the calling thread
- *  will be suspended and added to the mutex's wait queue, and it will
- *  be resumed when it has arrived at the top of the queue and the
- *  mutex is unlocked by another thread.
+ *  If the mutex cannot be immediately acquired, the calling task will
+ *  be suspended and added to the mutex's wait list. When the mutex
+ *  gets unlocked, all tasks on the wait list will be woken up and the
+ *  task that gets to acquire the mutex next will be determined by the
+ *  scheduler. This behavior is necessary to avoid priority inversion.
+ *  This function will not return until the task has acquired the
+ *  mutex, meaning that it might be suspended indefinitely.
  *
  * RETURN VALUE
  *  0 the mutex was successfully acquired
- *  -EDEADLK acquiring the mutex would have caused a deadlock
- *  -ENOMEM the kernel ran out of memory while enqueueing the thread
  */
 int mutex_lock(mutex_t*);
 
@@ -49,8 +50,8 @@ int mutex_trylock(mutex_t*);
  * DESCRIPTION
  *  The mutex_unlock() function will attempt to unlock the mutex
  *  pointed to by `mutex'. If the calling thread is the owner of
- *  the mutex, the mutex will be unlocked and the thread that is
- *  waiting at the top of the mutex's wait queue will be awoken.
+ *  the mutex, the mutex will be unlocked and all tasks that are
+ *  on the mutex's wait list will be woken up.
  *
  * RETURN VALUE
  *  0 the mutex was successfully unlocked
diff --git a/kernel/klibc/mutex.c b/kernel/klibc/mutex.c
index 9422b89..f76693c 100644
--- a/kernel/klibc/mutex.c
+++ b/kernel/klibc/mutex.c
@@ -14,69 +14,95 @@ struct _mutex {
 	task_t        *mx_waitq[CONFIG_MUTEX_WAITQLEN];
 };
 
+inline static int _nq(mutex_t *mutex, task_t *t)
+{
+	int i;
+
+	for(i = 0; i < CONFIG_MUTEX_WAITQLEN; i++) {
+		if(!mutex->mx_waitq[i]) {
+			mutex->mx_waitq[i] = t;
+			return(i);
+		}
+	}
+
+	return(-ENOMEM);
+}
+
+inline static void _wakeup_all(mutex_t *mutex)
+{
+	int i;
+
+	for(i = 0; i < CONFIG_MUTEX_WAITQLEN; i++) {
+		if(mutex->mx_waitq[i]) {
+			sched_wakeup(mutex->mx_waitq[i], 0);
+		}
+	}
+
+	return;
+}
+
 int mutex_lock(mutex_t *mutex)
 {
 	task_t *ctask;
 	int ret_val;
+	int slot;
 
-	ret_val = EAGAIN;
 	ctask = task_get_current();
+	ret_val = -EAGAIN;
+	slot = -1;
 
-	while(ret_val == EAGAIN) {
-		int i;
-
-		/* lock the mutex */
+	do {
 		spinlock_lock(&(mutex->mx_lock));
 
 		if(!mutex->mx_owner) {
-			/* mutex is not currently held by anyone - claim it */
+			/* the mutex is unclaimed - claim it */
 			mutex->mx_owner = ctask;
 
 			/*
-			 * Move the waitq up by one item, removing the first one (which should
-			 * have been the task that is currently executing this function)
+			 * We had been in the queue if slot isn't negative,
+			 * so we also need to remove ourselves from the queue.
 			 */
-			for(i = 0; i < (CONFIG_MUTEX_WAITQLEN - 1) && mutex->mx_waitq[i + 1]; i++) {
-				mutex->mx_waitq[i] = mutex->mx_waitq[i + 1];
+			if(slot >= 0) {
+				mutex->mx_waitq[slot] = NULL;
 			}
-			mutex->mx_waitq[CONFIG_MUTEX_WAITQLEN - 1] = NULL;
 
 			ret_val = 0;
 		} else {
-			/* add the caller to the wait queue */
-
-			for(i = 0; i < CONFIG_MUTEX_WAITQLEN; i++) {
-				if(!mutex->mx_waitq[i]) {
-					mutex->mx_waitq[i] = ctask;
-					break;
-				}
-			}
+			/* mutex is currently claimed by someone else */
 
-			if(i < CONFIG_MUTEX_WAITQLEN) {
-				/* successfully enqueued */
-				ret_val = EAGAIN;
-			} else {
-				/* the queue was full */
-				ret_val = -ENOMEM;
+			if(slot < 0) {
+				/* get a slot in the wait queue */
+				slot = _nq(mutex, ctask);
 			}
 		}
 
-		/* unlock the mutex */
 		spinlock_unlock(&(mutex->mx_lock));
 
 		/*
-		 * At this point, ret_val will have one of the values { -ENOMEM, 0, EAGAIN }.
-		 * In case of -ENOMEM and 0, the loop will end and the value is returned to
-		 * the caller. In the case of EAGAIN, the task will be suspended since it was
-		 * successfully added to the mutex's wait queue. EAGAIN is deliberately chosen
-		 * not to be a negative value since it is not an error that is suppposed to be
-		 * returned to the caller.
+		 * FIXME: Dynamically adjust the length of the wait queue
+		 *
+		 * It is possible that we could not get a slot in the wait
+		 * queue. In this case, the value of `slot' will be -ENOMEM
+		 * and this task will, instead of going to sleep, continue
+		 * execution of this function until it either gets into the
+		 * queue or acquires the mutex. This is not good because it
+		 * will either lead to excessive resource consumption (in the
+		 * good case), or to priority inversion because the task that
+		 * is busy-waiting on the mutex will potentially acquire a
+		 * recently-unlocked mutex before any of the tasks in the
+		 * wait queue were scheduled for execution.
 		 */
-		if(ret_val == EAGAIN) {
-			/* suspend this task */
-			sched_task_suspend(NULL);
+
+		if(slot >= 0) {
+			/*
+			 * We have a place in the queue. Go to bed and wait
+			 * to get woken up by the scheduler once the current
+			 * owner has unlocked the mutex.
+			 */
+			sched_sleep(NULL);
 		}
-	}
+
+	} while(ret_val < 0);
 
 	return(ret_val);
 }
@@ -115,10 +141,21 @@ int mutex_unlock(mutex_t *mutex)
 		/* caller currently owns the mutex - unlock it */
 		mutex->mx_owner = NULL;
 
-		/* if there is a task in the waitq, resume it */
-		if(mutex->mx_waitq[0]) {
-			sched_task_resume(mutex->mx_waitq[0]);
-		}
+		/* wake up all tasks waiting on the mutex */
+		_wakeup_all(mutex);
+
+		/*
+		 * We could also just wake up the first task in the queue,
+		 * but this might lead to priority inversion. The second task
+		 * might actually have a much higher priority than the first
+		 * one, but the mutex has no notion of task priorities. By
+		 * waking up all tasks that are in the queue, we leave it to
+		 * the scheduler to decide which task will get to the mutex
+		 * first. This is slightly more wasteful because all but one
+		 * task will attempt to lock the mutex in vain, but deciding
+		 * the correct order of execution is up to the scheduler, not
+		 * to a mutex.
+		 */
 
 		ret_val = 0;
 	} else if(!mutex->mx_owner) {
-- 
2.47.3