转载自 https://blog.csdn.net/hzj_001/article/details/100054659
主体涉及到了3个机制:kernel watchodog线程,高精度定时器(时钟中断),基于PMU硬件perf event的NMI(不可屏蔽中断)。
基本思想:
1.)(soft lockup):抢占被长时间关闭而导致其余进程无法调度
2.)(hard lockup):中断被长时间关闭而导致
softlockup基本原理:
1)SoftLockup 检测首先需要对每一个CPU core注册叫做watchdog的kernel线程。即[watchdog/0],[watchdog/1],[watchdog/2]…
2)同时,系统会有一个高精度的计时器hrtimer,该计时器能定期产生时钟中断,该中断对应的中断回调函数是watchdog_timer_fn();此中断回调函数主要做3件事:
a.watchdog_interrupt_count函数更新hrtimer_interrupts变量(判断hardlockup会用)
b.wake_up_process唤醒watchdog线程(更新时间戳)
c.is_softlockup判断是否出现了soft_lockup
soft lock detector会检查时间戳,如果超过soft lockup threshold一直未更新,说明[watchdog/x]未得到运行机会,意味着CPU被霸占,也就是发生了soft lockup。
注意,这里面的内核线程[watchdog/x]的目的是更新时间戳,该时间戳是被watch的对象。而真正的看门狗,则是由时钟中断触发的 watchdog_timer_fn(),这里面 [watchdog/x]是被scheduler调用执行的,而watchdog_timer_fn()则是被中断触发的。
hardlockup基本原理:(也可参照另一篇https://blog.csdn.net/hzj_001/article/details/95059760)
1)注册一个基于PMU硬件的的perf event,经过watchdog_thresh(/proc/sys/kernel/watchdog_thresh)秒的时间会触发NMI中断
2)中断处理函数通过检测在二个NMI中断相应后的hrtimer_interrupts(上次的值hrtimer_interrupts_saved)值是否发生变化来判断是否发生hardlockup
3)保存中断计数hrtimer_interrupts_saved=hrtimer_interrupts
1.watchdog线程
系统会为每个cpu 注册一个一般的kernel线程,名字叫watchdog/0, watchdog/1...以此类推。
主要作用:将当前cpu时间戳,更新至watchdog_touch_ts
2.watchdog_enable会注册一个高精度定时器,通过时钟中断响应函数来实现一些看门狗功能
1)注册一个高精度时钟定时器
static void watchdog_enable(unsigned int cpu)
{
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
/* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
/* Enable the perf event */
watchdog_nmi_enable(cpu);
/* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED); /* initialize timestamp */
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
__touch_watchdog();
}
2)响应时钟中断,通过响应高精度时钟中断处理函数hrtimer_interrupt来执行中断回调函数watchdog_timer_fn。
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);//获取当前cpu的watchdog_touch_ts,该计数在watchdog内核线程被调度时更新
struct pt_regs *regs = get_irq_regs();
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (atomic_read(&watchdog_park_in_progress) != 0)
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
watchdog_interrupt_count();//更新中断计数
/* kick the softlockup detector */
wake_up_process(__this_cpu_read(softlockup_watchdog));//唤醒watchdog线程
/* .. and repeat */
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));//重启定时器
if (touch_ts == 0) {/*第一次执行,watchdog_touch_ts时间戳可能为零,需要更新touch_ts 为当前时间戳*/
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
/*
* If the time stamp was touched atomically
* make sure the scheduler tick is up to date.
*/
__this_cpu_write(softlockup_touch_sync, false);
sched_clock_tick();
}
/* Clear the guest paused flag on watchdog reset */
kvm_check_and_clear_guest_paused();
__touch_watchdog();//更新watchdog_touch_ts
return HRTIMER_RESTART;
}
/* check for a softlockup
* This is done by making sure a high priority task is
* being scheduled. The task touches the watchdog to
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*//*检测系统是否超过20s未发生调度,并做出决策*/
duration = is_softlockup(touch_ts);//判断是否发生了软锁,原理是判断touch_ts(时间戳)是否超过一定时间没有更新
if (unlikely(duration)) {
/*
* If a virtual machine is stopped by the host it can look to
* the watchdog like a soft lockup, check to see if the host
* stopped the vm before we issue the warning
*/
if (kvm_check_and_clear_guest_paused())
return HRTIMER_RESTART;
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true) {
/*soft_watchdog_warn标识会在已经出现了一次看门狗超时的情况下置位,
此处的用意是对于同一个死锁进程,内核只做一次报警动作,如果死锁的进程发生了改变,那该标识会重新设置为false,将可以重新触发报警。*/
/*
* When multiple processes are causing softlockups the
* softlockup detector only warns on the first one
* because the code relies on a full quiet cycle to
* re-arm. The second process prevents the quiet cycle
* and never gets reported. Use task pointers to detect
* this.
*/
if (__this_cpu_read(softlockup_task_ptr_saved) !=
current) {
__this_cpu_write(soft_watchdog_warn, false);
__touch_watchdog();
}
return HRTIMER_RESTART;
}
if (softlockup_all_cpu_backtrace) {
/* Prevent multiple soft-lockup reports if one cpu is already
* engaged in dumping cpu back traces
*/
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
/* Someone else will report us. Let's give up */
__this_cpu_write(soft_watchdog_warn, true);
return HRTIMER_RESTART;
}
}
/*上面是一些条件的判断是否是真正的soft_lockup.下面是当soft_lockup出现的话会将 一些必要的信息dump出来.*/
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
__this_cpu_write(softlockup_task_ptr_saved, current);
print_modules();
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
if (softlockup_all_cpu_backtrace) {
/* Avoid generating two back traces for current
* given that one is already made above
*/
trigger_allbutself_cpu_backtrace();
clear_bit(0, &soft_lockup_nmi_warn);
/* Barrier to sync with other cpus */
smp_mb__after_atomic();
}
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)//如果配置了softlockup_panic(proc中配置),则panic
panic("softlockup: hung tasks");
__this_cpu_write(soft_watchdog_warn, true); //出现了一次超时,softlock。
} else
__this_cpu_write(soft_watchdog_warn, false);
return HRTIMER_RESTART;
}
3.watchdog_enable中会通过watchdog_nmi_enable注册一个基于PMU硬件的perf event,通过NMI中断回调函数watchdog_overflow_callback检测hardlockup
这个硬件在x86里叫performance monitoring,这个硬件有一个功能就是在cpu clock经过了多少个周期后发出一个NMI中断出来。
1)注册perf事件
static int watchdog_nmi_enable(unsigned int cpu)
{
struct perf_event_attr *wd_attr;
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
}
2)设置定时时间sample_period
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
return (u64)(cpu_khz) * 1000 * watchdog_thresh;
}
在这里,根据当前cpu的频率,算出一个值,也就是20秒cpu clock经过的周期数,太绕,实际就是20s会触发NMI中断
3)响应NMI中断,执行中断回调函数watchdog_overflow_callback,检测hardlockup
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
if (is_hardlockup()) {
int this_cpu = smp_processor_id();
if (hardlockup_panic)
panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
else
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
return;
}
return;
}
这个函数主要就是调用is_hardlockup
/* watchdog detector functions */
static bool is_hardlockup(void)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
return true;
__this_cpu_write(hrtimer_interrupts_saved, hrint);
return false;
}
这个函数主要就是查看hrtimer_interrupts变量在时钟中断处理函数里有没有被更新。假如没有更新,就意味着中断出了问题,可能被错误代码长时间的关中断了。