内核代码阅读(26) - sleep和定时器

2023-11-10 19:23:10

sleep和pause

asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
    {
        struct timespec t;
        unsigned long expire;
        if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
                return -EFAULT;
        if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
                return -EINVAL;
        if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
            current->policy != SCHED_OTHER)
        {
                udelay((t.tv_nsec + 999) / 1000);
                return 0;
        }
        expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
        current->state = TASK_INTERRUPTIBLE;
        expire = schedule_timeout(expire);
        if (expire) {
                if (rmtp) {
                        jiffies_to_timespec(expire, &t);
                        if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
                                return -EFAULT;
                }
                return -EINTR;
        }
        return 0;
    }

1) 参数(struct timespec *rqtp, struct timespec *rmtp)
   rqtp是输入，即要睡眠的时间
   rmtp是输出，即还有多少时间没有睡眠。
2) if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && current->policy != SCHED_OTHER)
   由于时钟中断的频率是100HZ，也就是时钟的精确度是10ms，而要求睡眠的时间小于2毫秒，并且是一个实时的进程，不能让这个进程进入睡眠，因为可能10毫秒之后醒来。
   这里是通过udelay，来消耗2毫秒对应的指令数。然后就完成了睡眠，返回到用户空间了。
3) expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
   把要睡眠的时间转换成时钟中断的次数。
4) current->state = TASK_INTERRUPTIBLE;
   更改状态。
5) expire = schedule_timeout(expire);
   显示触发一次的调度。
6) if (expire)
   进程已经完成了睡眠，expire是剩余的中断数。转换成时间拷贝到用户空间。

udelay 消耗时间对应的指令个数

#define udelay(n) (__builtin_constant_p(n) ? \
        ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
        __udelay(n))

static void __loop_delay(unsigned long loops)
    {
        int d0;
        __asm__ __volatile__(
                "\tjmp 1f\n"
                ".align 16\n"
                "1:\tjmp 2f\n"
                ".align 16\n"
                "2:\tdecl %0\n\tjns 2b"
                :"=&a" (d0)
                :"0" (loops));
    }

最于短时间的delay，通过空转CPU实现。
有些外设要求两次外设操作之间要间隔一定的时间。

schedule_timeout

signed long schedule_timeout(signed long timeout)
    {
        struct timer_list timer;
        unsigned long expire;
        switch (timeout)
        {
        case MAX_SCHEDULE_TIMEOUT:
                schedule();
                goto out;
        default:
                if (timeout < 0)
                {
                        printk(KERN_ERR "schedule_timeout: wrong timeout "
                               "value %lx from %p\n", timeout,
                               __builtin_return_address(0));
                        current->state = TASK_RUNNING;
                        goto out;
                }
        }
        expire = timeout + jiffies;
        init_timer(&timer);
        timer.expires = expire;
        timer.data = (unsigned long) current;
        timer.function = process_timeout;
        add_timer(&timer);
        schedule();
        del_timer_sync(&timer);
        timeout = expire - jiffies;
    out:
        return timeout < 0 ? 0 : timeout;
    }

1) add_timer(&timer);
   定时器的插入，在每次时钟中断来了后，要能够快速的找到应该处理的定时器，也就是到时时间是当前的jiffies。
2) schedule();
   主动触发一次调度，当前进程在内核态就在schedule里面切换到了另外一个进程了。
   即，当前进程在执行到schedule被切换走了，等到被唤醒才能接着执行。
3) timeout = expire - jiffies;
   执行到这条指令，说明进程已经被唤醒了。计算出剩余的时间并且返回。
   实际上这个进程已经经历了从运行到调度出去，有回到运行的过程。
4) del_timer_sync(&timer);
   进程被唤醒后把timer从全局的定时器桶中删除。
   定时器在时钟中断已经删除了一次，这个地方为什么还是删除呢？原因进程可能收到了信号，提前醒来了。定时器桶没有机会在中断的驱动下被删除。

添加一个定时器到内核的定时器队列中 - 定时器的数据结构和算法

static inline void internal_add_timer(struct timer_list *timer)
    {
        unsigned long expires = timer->expires;
        unsigned long idx = expires - timer_jiffies;
        struct list_head * vec;
        if (idx < TVR_SIZE) {
                int i = expires & TVR_MASK;
                vec = tv1.vec + i;
        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
                int i = (expires >> TVR_BITS) & TVN_MASK;
                vec = tv2.vec + i;
        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
                vec =  tv3.vec + i;
        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
                vec = tv4.vec + i;
        } else if ((signed long) idx < 0) {
                vec = tv1.vec + tv1.index;
        } else if (idx <= 0xffffffffUL) {
                int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
                vec = tv5.vec + i;
        } else {
                INIT_LIST_HEAD(&timer->list);
                return;
        }
        list_add(&timer->list, vec->prev);
    }

首先在内核中把要到点的时间转换成jiffies来处理。
如何来维护定时器，保证在时间点到了后快速的找到要处理的定时器呢？
内核将32位的expire分解成了 6 + 6 + 6 + 6 + 8 一共5hash表来维护。
v1是最早到时的定时器，v2次之。每次时钟中断来了之后，都从v1中取定时器，并把v1的index往下面移动一位。如果index移动了256次，则从v2中取一批定时器，如果v2的index也走完了64次，则从更高级别的v3中取一批。
这个过程很像进位制，低位的定时器消耗完了，从高位的取一批定时器，把这批定时器分散到低位的操位里。

struct timer_vec {
        int index;
        struct list_head vec[6];
    };
    struct timer_vec_root {
        int index;
        struct list_head vec[8];
    };
    static struct timer_vec tv5;
    static struct timer_vec tv4;
    static struct timer_vec tv3;
    static struct timer_vec tv2;
    static struct timer_vec_root tv1;
    static struct timer_vec * const tvecs[] = {
        (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
    };

内核定时器队列的消费 - 时钟中断对定时器的处理

void timer_bh(void)
    {
        update_times();
        run_timer_list();
   }

时钟中断的bottom half处理了定时的列表。

run_timer_list

这个函数是定时器的全部逻辑，共做了两件事情：
 1) 每当低位的定时器桶走完了256格，或者64格，就从高位‘借位’。
 2) 对指针tv1.index所指向的定时器桶，遍历处理。

static inline void run_timer_list(void)
    {
        spin_lock_irq(&timerlist_lock);
        while ((long)(jiffies - timer_jiffies) >= 0) {
                struct list_head *head, *curr;
                if (!tv1.index) {
                        int n = 1;
                        do {
                                cascade_timers(tvecs[n]);
                        } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
                }
    repeat:
                head = tv1.vec + tv1.index;
                curr = head->next;
                if (curr != head) {
                        struct timer_list *timer;
                        void (*fn)(unsigned long);
                        unsigned long data;
                        timer = list_entry(curr, struct timer_list, list);
                         fn = timer->function;
                         data= timer->data;
                        detach_timer(timer);
                        timer->list.next = timer->list.prev = NULL;
                        timer_enter(timer);
                        spin_unlock_irq(&timerlist_lock);
                        fn(data);
                        spin_lock_irq(&timerlist_lock);
                        timer_exit();
                        goto repeat;
                }
                ++timer_jiffies; 
                tv1.index = (tv1.index + 1) & TVR_MASK;
        }
        spin_unlock_irq(&timerlist_lock);
    }

1) if (!tv1.index) {
            int n = 1;
            do {
                    cascade_timers(tvecs[n]);
            } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
    }
   每当tv1.index走到0的时候，就从更高位的桶里获取定时器。
2) head = tv1.vec + tv1.index;
   处理指针tv1.index所指向的定时器桶。
3) if (curr != head)
   如果tv1.index所指向的定时器列表非空，则处理所有者个桶上的定时器。

定时器对睡眠进程的唤醒process_timeout

在定时器到时后，会调用schedule_timeout设置的回调函数process_timeout

static void process_timeout(unsigned long __data)
    {
        struct task_struct * p = (struct task_struct *) __data;
        wake_up_process(p);
    }

至此，实现了进程的到时唤醒。把目标进程加入了runqueue中。

码农公寓

sleep和pause

udelay 消耗时间对应的指令个数

schedule_timeout

添加一个定时器到内核的定时器队列中 - 定时器的数据结构和算法

内核定时器队列的消费 - 时钟中断对定时器的处理

run_timer_list

定时器对睡眠进程的唤醒process_timeout

相关文章