内核代码阅读(18) - 时钟中断的上半部do_timer和下半部timer_bh

凌晨1点正是看代码的好时间,自勉。

内核代码阅读(17) 中分析了如何借助softirq机制实现Top Half和Bottom Half的。时钟中断不仅是内核的心跳,而且很好的诠释了softirq机制是如何把始终中断分成上下两个部分。

时钟中断机制

时钟中断很重要,是内核的心跳。

进入内核态的途径:中断发生,异常,系统调用。

进程的调度只能在内核态运行,如果用户态的进程进入了死循环,而此时一直都没有中断,异常,系统调用,CPU岂不是一直进入不了内核执行?

时钟中断就是惟一可以预测的中断源。

时钟中断的初始化

asmlinkage void __init start_kernel(void)
    {
        char * command_line;
        unsigned long mempages;
        extern char saved_command_line[];
        lock_kernel();
        printk(linux_banner);
        setup_arch(&command_line);
        printk("Kernel command line: %s\n", saved_command_line);
        parse_options(command_line);
        trap_init();
        init_IRQ();
        sched_init();
        time_init();
        softirq_init();
    }
1) time_init 就是时钟中断初始化函数。

time_init

void __init time_init(void)
    {
        extern int x86_udelay_tsc;
        
        xtime.tv_sec = get_cmos_time();
        xtime.tv_usec = 0;
         dodgy_tsc();
         
        if (cpu_has_tsc) {
                unsigned long tsc_quotient = calibrate_tsc();
                if (tsc_quotient) {
                        fast_gettimeoffset_quotient = tsc_quotient;
                        use_tsc = 1;
                        x86_udelay_tsc = 1;
    #ifndef do_gettimeoffset
                        do_gettimeoffset = do_fast_gettimeoffset;
    #endif
                        do_get_fast_time = do_gettimeofday;
                        /* report CPU clock rate in Hz.
                         * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
                         * clock/second. Our precision is about 100 ppm.
                         */
                        {        unsigned long eax=0, edx=1000;
                                __asm__("divl %2"
                                       :"=a" (cpu_khz), "=d" (edx)
                                       :"r" (tsc_quotient),
                                "0" (eax), "1" (edx));
                                printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
                        }
                }
        }
        setup_irq(0, &irq0);
    }
1) xtime.tv_sec = get_cmos_time();
   xtime.tv_usec = 0;
   系统时钟涉及到两个全局变量:
   a) struct time_val xtime;
      struct timeval {
          time_t        tv_sec;
          suseconds_t        tv_usec;
      };
     xtime纪录了从历史上某个时刻开始的绝对时间,数值来自一个CMOS晶片,精确到秒。get_cmos_time获取秒并初始化全局变量xtime。
   b) jiffies
     记录了从开机以来时钟中断发生的次数。
2) setup_irq(0, &irq0);
   设置中断向量号0的服务子程序为irq0.
   static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
   所以,时钟中断的服务程序是 timer_interrupt。

时钟中断服务子程序timer_interrupt

static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
    {
        int count;
        write_lock(&xtime_lock);
 
        do_timer_interrupt(irq, NULL, regs);
        write_unlock(&xtime_lock);
    }
1) timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
   调用中断服务子程序的第一个参数是设备的id,第二个参数就是通过SAVE_ALL或者 jmp error_code 构造的寄存器现场。
2) write_lock(&xtime_lock);
   全局变量xtime需要上锁。

do_timer_interrupt

static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
    {
        do_timer(regs);
        if (!user_mode(regs))
                x86_do_profile(regs->eip);
    }
1) x86_do_profile(regs->eip);
   统计信息。

时钟中断上半段do_timer

void do_timer(struct pt_regs *regs)
    {
        (*(unsigned long *)&jiffies)++;
        mark_bh(TIMER_BH);
        if (TQ_ACTIVE(tq_timer))
                mark_bh(TQUEUE_BH);
    }
1) (*(unsigned long *)&jiffies)++;
   jiffies加1.
2) mark_bh(TIMER_BH);
   到此时钟中断‘上半部’执行完毕,激活时钟中断的Bottom Half。

时钟中断下半段 timer_bh

在sched_init中初始化了始终中断的下半段:
    init_bh(TIMER_BH, timer_bh);
    init_bh(TQUEUE_BH, tqueue_bh);
    init_bh(IMMEDIATE_BH, immediate_bh);
void timer_bh(void)
    {
        update_times();
        run_timer_list();
    }
update_times
static inline void update_times(void)
    {
        unsigned long ticks;
        write_lock_irq(&xtime_lock);
        ticks = jiffies - wall_jiffies;
        if (ticks) {
                wall_jiffies += ticks;
                update_wall_time(ticks);
        }
        write_unlock_irq(&xtime_lock);
        
    }
1) ticks = jiffies - wall_jiffies;
   wall_jiffies是墙上时间已经更新到了哪里了。
   差值ticks是需要更新的心跳数。
2) update_wall_time(ticks);
   校准更新时钟xtime
3) calc_load(ticks);
   计算load
update_wall_time
static void update_wall_time(unsigned long ticks)
    {
        do {
                ticks--;
                update_wall_time_one_tick();
        } while (ticks);
        if (xtime.tv_usec >= 1000000) {
            xtime.tv_usec -= 1000000;
            xtime.tv_sec++;
            second_overflow();
        }
    }
1) update_wall_time_one_tick
   校准更新时钟xtime
calc_load
unsigned long avenrun[3];
    static inline void calc_load(unsigned long ticks)
    {
        unsigned long active_tasks;
        static int count = LOAD_FREQ;
        count -= ticks;
        if (count < 0) {
                count += LOAD_FREQ;
                active_tasks = count_active_tasks();
                CALC_LOAD(avenrun[0], EXP_1, active_tasks);
                CALC_LOAD(avenrun[1], EXP_5, active_tasks);
                CALC_LOAD(avenrun[2], EXP_15, active_tasks);
        }
    }
rum_timer_list
static inline void run_timer_list(void)
{
    spin_lock_irq(&timerlist_lock);
    while ((long)(jiffies - timer_jiffies) >= 0) {
            struct list_head *head, *curr;
            if (!tv1.index) {
                    int n = 1;
                    do {
                            cascade_timers(tvecs[n]);
                    } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
            }
repeat:
            head = tv1.vec + tv1.index;
            curr = head->next;
            if (curr != head) {
                    struct timer_list *timer;
                    void (*fn)(unsigned long);
                    unsigned long data;
                    timer = list_entry(curr, struct timer_list, list);
                     fn = timer->function;
                     data= timer->data;
                    detach_timer(timer);
                    timer->list.next = timer->list.prev = NULL;
                    timer_enter(timer);
                    spin_unlock_irq(&timerlist_lock);
                    fn(data);
                    spin_lock_irq(&timerlist_lock);
                    timer_exit();
                    goto repeat;
            }
            ++timer_jiffies; 
            tv1.index = (tv1.index + 1) & TVR_MASK;
    }
    spin_unlock_irq(&timerlist_lock);
}
1) run_timer_list(void)
       完成系统中的定时器任务。
    2) struct timer_list {
           struct list_head list;
           unsigned long expires;
           unsigned long data;
           void (*function)(unsigned long);
       };
    3) ++timer_jiffies;
       一个tick最多执行一个timer。
    4) spin_unlock_irq(&timerlist_lock);
       fn(data);
       spin_lock_irq(&timerlist_lock);
       执行一个timer过程解锁timerlist_lock。
上一篇:内核代码阅读(20) - 进程


下一篇:内核代码阅读(19) - 系统调用trap