Tuesday 5 April 2016

Linux internals for waitqueue/wakeup functions

Wait queues and wakeup in linux kernel  :

I am using the kernel version 3.19 for the depiction of waitqueue functions :

First there is waitqueue head and the waitqueue.

The waitqueue structure is  :
 20 struct __wait_queue {
 21         unsigned int            flags;
 22         void                    *private;
 23         wait_queue_func_t       func;
 24         struct list_head        task_list;
 25 };
 12 typedef struct __wait_queue wait_queue_t;

So basically it has a wait_queue function and a task_list.

Also the wait structure is as follows :

 39 struct __wait_queue_head {
 40         spinlock_t              lock;
 41         struct list_head        task_list;
 42 };
 43 typedef struct __wait_queue_head wait_queue_head_t;

Wait queue head has just the task list embedded in it.

Initially to make a linked list of task we declare and define the wait_queue head.

 63 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 64         wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)

 59 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {                           \
 60         .lock           = __SPIN_LOCK_UNLOCKED(name.lock),              \
 61         .task_list      = { &(name).task_list, &(name).task_list } }

These steps initializes the spinlock and the task list.

Initializing the wait_queue head is done like :

 72 extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
 73
 74 #define init_waitqueue_head(q)                          \
 75         do {                                            \
 76                 static struct lock_class_key __key;     \
 77                                                         \
 78                 __init_waitqueue_head((q), #q, &__key); \
 79         } while (0)

 kernel/sched/wait.c
 14 void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
 15 {
 16         spin_lock_init(&q->lock);
 17         lockdep_set_class_and_name(&q->lock, key, name);
 18         INIT_LIST_HEAD(&q->task_list);
 19 }


Now we need to initialize the wait queue.

 51 #define __WAITQUEUE_INITIALIZER(name, tsk) {                            \
 52         .private        = tsk,                                          \
 53         .func           = default_wake_function,                        \
 54         .task_list      = { NULL, NULL } }
 55
 56 #define DECLARE_WAITQUEUE(name, tsk)                                    \
 57         wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)

This is the function to initialise a wait_queue entry :

 90 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 91 {
 92         q->flags        = 0;
 93         q->private      = p;
 94         q->func         = default_wake_function;
 95 }
 96

 Now to add the wait queue to wait queue head list we use function add_wait_queue :
 23 void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
 24 {
 25         unsigned long flags;
 26
 27         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
 28         spin_lock_irqsave(&q->lock, flags);
 29         __add_wait_queue(q, wait);
 30         spin_unlock_irqrestore(&q->lock, flags);
 31 }
 32 EXPORT_SYMBOL(add_wait_queue);


 114 static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
115 {
116         list_add(&new->task_list, &head->task_list);
117 }

 Lets take a look at default_wake_function
 kernel/sched/core.c
2991 int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
2992                           void *key)
2993 {
2994         return try_to_wake_up(curr->private, mode, wake_flags);
2995 }
2996 EXPORT_SYMBOL(default_wake_function);


1673 /**
1674  * try_to_wake_up - wake up a thread
1675  * @p: the thread to be awakened
1676  * @state: the mask of task states that can be woken
1677  * @wake_flags: wake modifier flags (WF_*)
1678  *
1679  * Put it on the run-queue if it's not already there. The "current"
1680  * thread is always on the run-queue (except when the actual
1681  * re-schedule is in progress), and as such you're allowed to do
1682  * the simpler "current->state = TASK_RUNNING" to mark yourself
1683  * runnable without the overhead of this.
1684  *
1685  * Return: %true if @p was woken up, %false if it was already running.
1686  * or @state didn't match @p's state.
1687  */
1688 static int
1689 try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1690 {
1691         unsigned long flags;
1692         int cpu, success = 0;
1693
1694         /*
1695          * If we are going to wake up a thread waiting for CONDITION we
1696          * need to ensure that CONDITION=1 done by the caller can not be
1697          * reordered with p->state check below. This pairs with mb() in
1698          * set_current_state() the waiting thread does.
1699          */
1700         smp_mb__before_spinlock();
1701         raw_spin_lock_irqsave(&p->pi_lock, flags);
1702         if (!(p->state & state))
1703                 goto out;
1704
1705         success = 1; /* we're going to change ->state */
1706         cpu = task_cpu(p);
1707
1708         if (p->on_rq && ttwu_remote(p, wake_flags))
1709                 goto stat;
1710
1711 #ifdef CONFIG_SMP
1712         /*
1713          * If the owning (remote) cpu is still in the middle of schedule() with
1714          * this task as prev, wait until its done referencing the task.
1715          */
1716         while (p->on_cpu)
1717                 cpu_relax();
1718         /*
1719          * Pairs with the smp_wmb() in finish_lock_switch().
1720          */
1721         smp_rmb();
1722
1723         p->sched_contributes_to_load = !!task_contributes_to_load(p);
1724         p->state = TASK_WAKING;
1725
1726         if (p->sched_class->task_waking)
1727                 p->sched_class->task_waking(p);
1728
1729         cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
1730         if (task_cpu(p) != cpu) {
1731                 wake_flags |= WF_MIGRATED;
1732                 set_task_cpu(p, cpu);
1733         }
1734 #endif /* CONFIG_SMP */
1735
1736         ttwu_queue(p, cpu);
1737 stat:
1738         ttwu_stat(p, cpu, wake_flags);
1739 out:
1740         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
1741
1742         return success;
1743 }


How the try_to_wake_up the default_wake_function is called ??
We need to wake up the tasks put in the wait queue. wake_up functions come for help. 

include/linux/wait.h
#define wake_up(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_nr(x, nr) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_all(x) __wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0, NULL)
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)

 89 void __wake_up(wait_queue_head_t *q, unsigned int mode,
 90                         int nr_exclusive, void *key)
 91 {
 92         unsigned long flags;
 93
 94         spin_lock_irqsave(&q->lock, flags);
 95         __wake_up_common(q, mode, nr_exclusive, 0, key);
 96         spin_unlock_irqrestore(&q->lock, flags);
 97 }
 98 EXPORT_SYMBOL(__wake_up);


 65 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 66                         int nr_exclusive, int wake_flags, void *key)
 67 {
 68         wait_queue_t *curr, *next;
 69
 70         list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
 71                 unsigned flags = curr->flags;
 72
 73                 if (curr->func(curr, mode, wake_flags, key) &&         <------default_wake_function called
 74                                 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 75                         break;
 76         }
 77 }


Other functions for wait/wakeup 
#define wait_event_interruptible(wq, condition)
#define wait_event_timeout(wq, condition, timeout) { ... }
#define wait_event_interruptible_timeout(wq, condition, timeout)


390 #define wait_event_interruptible(wq, condition)                         \
391 ({                                                                      \
392         int __ret = 0;                                                  \
393         might_sleep();                                                  \
394         if (!(condition))                                               \
395                 __ret = __wait_event_interruptible(wq, condition);      \
396         __ret;                                                          \
397 })


371 #define __wait_event_interruptible(wq, condition)                       \
372         ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,          \
373                       schedule())
374

 212 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)        \
213 ({                                                                      \
214         __label__ __out;                                                \
215         wait_queue_t __wait;                                            \
216         long __ret = ret;       /* explicit shadow */                   \
217                                                                         \
218         INIT_LIST_HEAD(&__wait.task_list);                              \
219         if (exclusive)                                                  \
220                 __wait.flags = WQ_FLAG_EXCLUSIVE;                       \
221         else                                                            \
222                 __wait.flags = 0;                                       \
223                                                                         \
224         for (;;) {                                                      \
225                 long __int = prepare_to_wait_event(&wq, &__wait, state);\
226                                                                         \
227                 if (condition)                                          \
228                         break;                                          \
229                                                                         \
230                 if (___wait_is_interruptible(state) && __int) {         \
231                         __ret = __int;                                  \
232                         if (exclusive) {                                \
233                                 abort_exclusive_wait(&wq, &__wait,      \
234                                                      state, NULL);      \
235                                 goto __out;                             \
236                         }                                               \
237                         break;                                          \
238                 }                                                       \
239                                                                         \
240                 cmd;                                                    \
241         }                                                               \
242         finish_wait(&wq, &__wait);                                      \
243 __out:  __ret;                                                          \
244 })
245


199 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
200 {
201         unsigned long flags;
202
203         if (signal_pending_state(state, current))
204                 return -ERESTARTSYS;
205
206         wait->private = current;
207         wait->func = autoremove_wake_function;
208
209         spin_lock_irqsave(&q->lock, flags);
210         if (list_empty(&wait->task_list)) {
211                 if (wait->flags & WQ_FLAG_EXCLUSIVE)
212                         __add_wait_queue_tail(q, wait);
213                 else
214                         __add_wait_queue(q, wait);
215         }
216         set_current_state(state);
217         spin_unlock_irqrestore(&q->lock, flags);
218
219         return 0;
220 }
221 EXPORT_SYMBOL(prepare_to_wait_event);

232 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
233 {
234         unsigned long flags;
235
236         __set_current_state(TASK_RUNNING);
237         /*
238          * We can check for list emptiness outside the lock
239          * IFF:
240          *  - we use the "careful" check that verifies both
241          *    the next and prev pointers, so that there cannot
242          *    be any half-pending updates in progress on other
243          *    CPU's that we haven't seen yet (and that might
244          *    still change the stack area.
245          * and
246          *  - all other users take the lock (ie we can only
247          *    have _one_ other CPU that looks at or modifies
248          *    the list).
249          */
250         if (!list_empty_careful(&wait->task_list)) {
251                 spin_lock_irqsave(&q->lock, flags);
252                 list_del_init(&wait->task_list);
253                 spin_unlock_irqrestore(&q->lock, flags);
254         }
255 }
256 EXPORT_SYMBOL(finish_wait);

No comments:

Post a Comment