凌晨1点正是看代码的好时间,自勉。
内核代码阅读(17) 中分析了如何借助softirq机制实现Top Half和Bottom Half的。时钟中断不仅是内核的心跳,而且很好的诠释了softirq机制是如何把始终中断分成上下两个部分。
时钟中断机制
时钟中断很重要,是内核的心跳。
进入内核态的途径:中断发生,异常,系统调用。
进程的调度只能在内核态运行,如果用户态的进程进入了死循环,而此时一直都没有中断,异常,系统调用,CPU岂不是一直进入不了内核执行?
时钟中断就是惟一可以预测的中断源。
时钟中断的初始化
asmlinkage void __init start_kernel(void) { char * command_line; unsigned long mempages; extern char saved_command_line[]; lock_kernel(); printk(linux_banner); setup_arch(&command_line); printk("Kernel command line: %s\n", saved_command_line); parse_options(command_line); trap_init(); init_IRQ(); sched_init(); time_init(); softirq_init(); }
1) time_init 就是时钟中断初始化函数。
time_init
void __init time_init(void) { extern int x86_udelay_tsc; xtime.tv_sec = get_cmos_time(); xtime.tv_usec = 0; dodgy_tsc(); if (cpu_has_tsc) { unsigned long tsc_quotient = calibrate_tsc(); if (tsc_quotient) { fast_gettimeoffset_quotient = tsc_quotient; use_tsc = 1; x86_udelay_tsc = 1; #ifndef do_gettimeoffset do_gettimeoffset = do_fast_gettimeoffset; #endif do_get_fast_time = do_gettimeofday; /* report CPU clock rate in Hz. * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = * clock/second. Our precision is about 100 ppm. */ { unsigned long eax=0, edx=1000; __asm__("divl %2" :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } } } setup_irq(0, &irq0); }
1) xtime.tv_sec = get_cmos_time(); xtime.tv_usec = 0; 系统时钟涉及到两个全局变量: a) struct time_val xtime; struct timeval { time_t tv_sec; suseconds_t tv_usec; }; xtime纪录了从历史上某个时刻开始的绝对时间,数值来自一个CMOS晶片,精确到秒。get_cmos_time获取秒并初始化全局变量xtime。 b) jiffies 记录了从开机以来时钟中断发生的次数。 2) setup_irq(0, &irq0); 设置中断向量号0的服务子程序为irq0. static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL}; 所以,时钟中断的服务程序是 timer_interrupt。
时钟中断服务子程序timer_interrupt
static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { int count; write_lock(&xtime_lock); do_timer_interrupt(irq, NULL, regs); write_unlock(&xtime_lock); }
1) timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) 调用中断服务子程序的第一个参数是设备的id,第二个参数就是通过SAVE_ALL或者 jmp error_code 构造的寄存器现场。 2) write_lock(&xtime_lock); 全局变量xtime需要上锁。
do_timer_interrupt
static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { do_timer(regs); if (!user_mode(regs)) x86_do_profile(regs->eip); }
1) x86_do_profile(regs->eip); 统计信息。
时钟中断上半段do_timer
void do_timer(struct pt_regs *regs) { (*(unsigned long *)&jiffies)++; mark_bh(TIMER_BH); if (TQ_ACTIVE(tq_timer)) mark_bh(TQUEUE_BH); }
1) (*(unsigned long *)&jiffies)++; jiffies加1. 2) mark_bh(TIMER_BH); 到此时钟中断‘上半部’执行完毕,激活时钟中断的Bottom Half。
时钟中断下半段 timer_bh
在sched_init中初始化了始终中断的下半段: init_bh(TIMER_BH, timer_bh); init_bh(TQUEUE_BH, tqueue_bh); init_bh(IMMEDIATE_BH, immediate_bh);
void timer_bh(void) { update_times(); run_timer_list(); }
update_times
static inline void update_times(void) { unsigned long ticks; write_lock_irq(&xtime_lock); ticks = jiffies - wall_jiffies; if (ticks) { wall_jiffies += ticks; update_wall_time(ticks); } write_unlock_irq(&xtime_lock); }
1) ticks = jiffies - wall_jiffies; wall_jiffies是墙上时间已经更新到了哪里了。 差值ticks是需要更新的心跳数。 2) update_wall_time(ticks); 校准更新时钟xtime 3) calc_load(ticks); 计算load
update_wall_time
static void update_wall_time(unsigned long ticks) { do { ticks--; update_wall_time_one_tick(); } while (ticks); if (xtime.tv_usec >= 1000000) { xtime.tv_usec -= 1000000; xtime.tv_sec++; second_overflow(); } }
1) update_wall_time_one_tick 校准更新时钟xtime
calc_load
unsigned long avenrun[3]; static inline void calc_load(unsigned long ticks) { unsigned long active_tasks; static int count = LOAD_FREQ; count -= ticks; if (count < 0) { count += LOAD_FREQ; active_tasks = count_active_tasks(); CALC_LOAD(avenrun[0], EXP_1, active_tasks); CALC_LOAD(avenrun[1], EXP_5, active_tasks); CALC_LOAD(avenrun[2], EXP_15, active_tasks); } }
rum_timer_list
static inline void run_timer_list(void) { spin_lock_irq(&timerlist_lock); while ((long)(jiffies - timer_jiffies) >= 0) { struct list_head *head, *curr; if (!tv1.index) { int n = 1; do { cascade_timers(tvecs[n]); } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); } repeat: head = tv1.vec + tv1.index; curr = head->next; if (curr != head) { struct timer_list *timer; void (*fn)(unsigned long); unsigned long data; timer = list_entry(curr, struct timer_list, list); fn = timer->function; data= timer->data; detach_timer(timer); timer->list.next = timer->list.prev = NULL; timer_enter(timer); spin_unlock_irq(&timerlist_lock); fn(data); spin_lock_irq(&timerlist_lock); timer_exit(); goto repeat; } ++timer_jiffies; tv1.index = (tv1.index + 1) & TVR_MASK; } spin_unlock_irq(&timerlist_lock); }
1) run_timer_list(void) 完成系统中的定时器任务。 2) struct timer_list { struct list_head list; unsigned long expires; unsigned long data; void (*function)(unsigned long); }; 3) ++timer_jiffies; 一个tick最多执行一个timer。 4) spin_unlock_irq(&timerlist_lock); fn(data); spin_lock_irq(&timerlist_lock); 执行一个timer过程解锁timerlist_lock。