一、打印说明
1. 打印内容
# cat /proc/sched_debug Sched Debug Version: v0.11, 5.10.xx-android12-x-xx-xxxxxxxxx #1 ktime : 20791668.206238 sched_clk : 20791683.112454 cpu_clk : 20791683.112454 jiffies : 4300090214 sysctl_sched .sysctl_sched_latency : 10.000000 .sysctl_sched_min_granularity : 3.000000 .sysctl_sched_wakeup_granularity : 2.000000 .sysctl_sched_child_runs_first : 0 .sysctl_sched_features : 16722747 //使能了哪些调度feature,见features.h .sysctl_sched_tunable_scaling : 0 (none) cpu#0 .nr_running : 0 .nr_switches : 515789 .nr_uninterruptible : -348 .next_balance : 4300.090217 .curr->pid : 0 .clock : 20791690.941377 .clock_task : 20393319.574563 .avg_idle : 681678 .max_idle_balance_cost : 347294 .yld_count : 45325 .sched_count : 558874 .sched_goidle : 174803 .ttwu_count : 2261133 .ttwu_local : 155736 cfs_rq[0]:/ .exec_clock : 150058.081435 .MIN_vruntime : 0.000001 .min_vruntime : 1032733.837701 //最小虚拟时间 .max_vruntime : 0.000001 .spread : 0.000000 .spread0 : 0.000000 .nr_spread_over : 7046 .nr_running : 0 .load : 0 //负载信息 .load_avg : 0 .runnable_avg : 0 .util_avg : 0 .util_est_enqueued : 0 .removed.load_avg : 0 .removed.util_avg : 0 .removed.runnable_avg : 0 .tg_load_avg_contrib : 0 .tg_load_avg : 0 rt_rq[0]: .rt_nr_running : 0 .rt_nr_migratory : 0 .rt_throttled : 0 .rt_time : 3.853386 .rt_runtime : 950.000000 dl_rq[0]: .dl_nr_running : 0 .dl_nr_migratory : 0 .dl_bw->bw : 996147 .dl_bw->total_bw : 0 runnable tasks: S task PID tree-key switches prio wait-time sum-exec sum-sleep ------------------------------------------------------------------------------------------------------------- I rcu_par_gp 4 8.725293 2 100 0.000000 0.009155 0.000000 / D hang_detect 152 0.000000 675 0 0.000000 87.342714 0.000000 / >R Binder:1061_1 17584 1122.927614 598 120 189.109238 1271.457995 183.664618 /foreground S Binder:799_2 844 20.058758 2 120 0.950462 0.123000 0.000000 /foreground S HwBinder:1154_1 1722 1420.876848 11 120 5.680075 1.564693 2.107003 /top-app S Binder:3472_3 3539 555381.752165 65 120 24.436231 92.525768 20237325.907593 /background ... S irq/520-event_0 156 0.000000 4 49 0.000000 0.307768 0.000000 / //每个cpu的都进行打印,这里只保留cpu0的
2. header部分打印函数
//kernel/sched/debug.c static void sched_debug_header(struct seq_file *m) { u64 ktime, sched_clk, cpu_clk; unsigned long flags; local_irq_save(flags); ktime = ktime_to_ns(ktime_get()); sched_clk = sched_clock(); cpu_clk = local_clock(); local_irq_restore(flags); SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n", init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); #define P(x) \ SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x)) #define PN(x) \ SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(ktime); PN(sched_clk); PN(cpu_clk); P(jiffies); #undef PN #undef P SEQ_printf(m, "\n"); SEQ_printf(m, "sysctl_sched\n"); #define P(x) \ SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) #define PN(x) \ SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(sysctl_sched_latency); PN(sysctl_sched_min_granularity); PN(sysctl_sched_wakeup_granularity); P(sysctl_sched_child_runs_first); P(sysctl_sched_features); #undef PN #undef P SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", sysctl_sched_tunable_scaling, sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); SEQ_printf(m, "\n"); } static int sched_debug_show(struct seq_file *m, void *v) { int cpu = (unsigned long)(v - 2); if (cpu != -1) print_cpu(m, cpu); else sched_debug_header(m); return 0; }
3. cpu#0 下的打印
static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); SEQ_printf(m, "cpu#%d\n", cpu); #define P(x) \ do { \ if (sizeof(rq->x) == 4) \ SEQ_printf(m, " .%-30s: %ld\n", #x, (long)(rq->x)); \ else \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\ } while (0) #define PN(x) \ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) P(nr_running); P(nr_switches); P(nr_uninterruptible); //以long类型打印unsinged long PN(next_balance); SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); PN(clock); PN(clock_task); #undef P #undef PN #ifdef CONFIG_SMP #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); P64(avg_idle); P64(max_idle_balance_cost); #undef P64 #endif #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n)); if (schedstat_enabled()) { P(yld_count); P(sched_count); P(sched_goidle); P(ttwu_count); P(ttwu_local); } #undef P /*下面分别是"cfs_rq[0]:"、"rt_rq[0]:"、"dl_rq[0]:"下的打印*/ print_cfs_stats(m, cpu); print_rt_stats(m, cpu); print_dl_stats(m, cpu); print_rq(m, rq, cpu); SEQ_printf(m, "\n"); }
4. cfs_rq[0]: 下的打印
void print_cfs_stats(struct seq_file *m, int cpu) { struct cfs_rq *cfs_rq, *pos; rcu_read_lock(); //对于rq->leaf_cfs_rq_list上的每一个叶子cfs_rq都调用,若没有使能组调度,就只打印 rq->cfs_rq for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos) print_cfs_rq(m, cpu, cfs_rq); rcu_read_unlock(); }
如果需要 CFS 支持组调度管理,那得把所有 CFS 加入到一个链表当中,leaf_cfs_rq_list 成员就是负责把本 CPU 下的就绪队列中各个 CFS 子队列关联起来。并且在 cfs_rq 里面有成员 on_list,其表示当前的 CFS 队列是通过 leaf_cfs_rq_list 成员挂载在 rq->leaf_cfs_rq_list 链表中的。
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, spread, rq0_min_vruntime, spread0; struct rq *rq = cpu_rq(cpu); struct sched_entity *last; unsigned long flags; #ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, "\n"); SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu); #else SEQ_printf(m, "\n"); SEQ_printf(m, "cfs_rq[%d]:\n", cpu); #endif SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); //格式:ms.ns raw_spin_lock_irqsave(&rq->lock, flags); if (rb_first_cached(&cfs_rq->tasks_timeline)) MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; last = __pick_last_entity(cfs_rq); if (last) max_vruntime = last->vruntime; min_vruntime = cfs_rq->min_vruntime; rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; raw_spin_unlock_irqrestore(&rq->lock, flags); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", SPLIT_NS(MIN_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", SPLIT_NS(min_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime", SPLIT_NS(max_vruntime)); spread = max_vruntime - MIN_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); spread0 = min_vruntime - rq0_min_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", SPLIT_NS(spread0)); SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_SMP SEQ_printf(m, " .%-30s: %lu\n", "load_avg", cfs_rq->avg.load_avg); SEQ_printf(m, " .%-30s: %lu\n", "runnable_avg", cfs_rq->avg.runnable_avg); SEQ_printf(m, " .%-30s: %lu\n", "util_avg", cfs_rq->avg.util_avg); SEQ_printf(m, " .%-30s: %u\n", "util_est_enqueued", cfs_rq->avg.util_est.enqueued); SEQ_printf(m, " .%-30s: %ld\n", "removed.load_avg", cfs_rq->removed.load_avg); SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg", cfs_rq->removed.util_avg); SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_avg", cfs_rq->removed.runnable_avg); #ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, " .%-30s: %lu\n", "tg_load_avg_contrib", cfs_rq->tg_load_avg_contrib); SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg", atomic_long_read(&cfs_rq->tg->load_avg)); #endif #endif #ifdef CONFIG_CFS_BANDWIDTH SEQ_printf(m, " .%-30s: %d\n", "throttled", cfs_rq->throttled); SEQ_printf(m, " .%-30s: %d\n", "throttle_count", cfs_rq->throttle_count); #endif #ifdef CONFIG_FAIR_GROUP_SCHED print_cfs_group_stats(m, cpu, cfs_rq->tg); //task_group 在此cpu上对应的se #endif } static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) { struct sched_entity *se = tg->se[cpu]; //task_group 在此cpu上对应的se #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F)) #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F))) if (!se) return; PN(se->exec_start); PN(se->vruntime); PN(se->sum_exec_runtime); if (schedstat_enabled()) { PN_SCHEDSTAT(se->statistics.wait_start); PN_SCHEDSTAT(se->statistics.sleep_start); PN_SCHEDSTAT(se->statistics.block_start); PN_SCHEDSTAT(se->statistics.sleep_max); PN_SCHEDSTAT(se->statistics.block_max); PN_SCHEDSTAT(se->statistics.exec_max); PN_SCHEDSTAT(se->statistics.slice_max); PN_SCHEDSTAT(se->statistics.wait_max); PN_SCHEDSTAT(se->statistics.wait_sum); P_SCHEDSTAT(se->statistics.wait_count); } P(se->load.weight); #ifdef CONFIG_SMP P(se->avg.load_avg); P(se->avg.util_avg); P(se->avg.runnable_avg); #endif #undef PN_SCHEDSTAT #undef PN #undef P_SCHEDSTAT #undef P }
5. rt_rq[0] 下的打印
void print_rt_stats(struct seq_file *m, int cpu) { rt_rq_iter_t iter; struct rt_rq *rt_rq; rcu_read_lock(); for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) print_rt_rq(m, cpu, rt_rq); rcu_read_unlock(); } void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { SEQ_printf(m, "\n"); SEQ_printf(m, "rt_rq[%d]:\n", cpu); #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) #define PU(x) \ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x)) #define PN(x) \ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) PU(rt_nr_running); #ifdef CONFIG_SMP PU(rt_nr_migratory); #endif P(rt_throttled); PN(rt_time); PN(rt_runtime); #undef PN #undef PU #undef P }
原生内核使能了 CONFIG_FAIR_GROUP_SCHED,却没有使能 CONFIG_RT_GROUP_SCHED
6. dl_rq[0] 下的打印
void print_dl_stats(struct seq_file *m, int cpu) { print_dl_rq(m, cpu, &cpu_rq(cpu)->dl); } void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) { struct dl_bw *dl_bw; SEQ_printf(m, "\n"); SEQ_printf(m, "dl_rq[%d]:\n", cpu); #define PU(x) \ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) PU(dl_nr_running); PU(dl_nr_migratory); dl_bw = &cpu_rq(cpu)->rd->dl_bw; SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw); SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw); #undef PU }
7. runnable tasks: 下的打印
static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) { struct task_struct *g, *p; SEQ_printf(m, "\n"); SEQ_printf(m, "runnable tasks:\n"); SEQ_printf(m, " S task PID tree-key switches prio" " wait-time sum-exec sum-sleep\n"); SEQ_printf(m, "-------------------------------------------------------" "------------------------------------------------------\n"); rcu_read_lock(); for_each_process_thread(g, p) { if (task_cpu(p) != rq_cpu) //对于 task_cpu(p) == rq_cpu 的每一个线程都打印 continue; print_task(m, rq, p); } rcu_read_unlock(); } static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { if (rq->curr == p) SEQ_printf(m, ">R"); else SEQ_printf(m, " %c", task_state_to_char(p)); //此CPU上的所有任务,包括睡眠的 SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ", p->comm, task_pid_nr(p), SPLIT_NS(p->se.vruntime), //格式: ms.ns (long long)(p->nvcsw + p->nivcsw), //主动放弃cpu+被抢占 p->prio); SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)), //总等待时间 SPLIT_NS(p->se.sum_exec_runtime), //总执行时间 SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime))); //纯休眠时间(sleep+D) #ifdef CONFIG_CGROUP_SCHED SEQ_printf_task_group_path(m, task_group(p), " %s") //task的cgroup分组 #endif SEQ_printf(m, "\n"); }