Linux5.4 Qcom平台
1. per-cpu的 update_util_data 实例是调度器与schedutil调频驱动沟通的桥梁,cpufreq_update_util()函数中访问里面的回调函数进行调频。
2. 触发调频的时机
(1) enqueue_task_fair 时
activate_task move_queued_task do_set_cpus_allowed rt_mutex_setprio set_user_nice __sched_setscheduler sched_setnuma sched_move_task enqueue_task_core enqueue_task --> enqueue_task_fair --> if (p->in_iowait) cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
(2) pick_next_task_fair时
balance_fair pick_next_task_fair newidle_balance --> nohz_newidle_balance --> _nohz_idle_balance --> rebalance_domains --> load_balance --> find_busiest_group --> update_sd_lb_stats --> update_sg_lb_stats --> update_nohz_stats --> update_blocked_averages --> cpufreq_update_util(rq, 0); newidle_balance --> nohz_newidle_balance --> _nohz_idle_balance --> update_nohz_stats --> update_blocked_averages --> cpufreq_update_util(rq, 0); newidle_balance --> load_balance --> find_busiest_group --> update_sd_lb_stats --> update_sg_lb_stats --> update_nohz_stats --> update_blocked_averages --> cpufreq_update_util(rq, 0);
(3) scheduler_tick时
scheduler_tick() --> trigger_load_balance //只有在 scheduler_tick()中通过 SCHED_SOFTIRQ 触发负载均衡。 __init init_sched_fair_class --> open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); trigger_load_balance --> raise_softirq(SCHED_SOFTIRQ); run_rebalance_domains --> nohz_idle_balance --> _nohz_idle_balance --> update_nohz_stats --> update_blocked_averages --> cpufreq_update_util(rq, 0); run_rebalance_domains --> nohz_idle_balance --> _nohz_idle_balance --> rebalance_domains --> load_balance --> find_busiest_group --> update_sd_lb_stats --> update_sg_lb_stats --> update_nohz_stats --> update_blocked_averages --> cpufreq_update_util(rq, 0); run_rebalance_domains --> update_blocked_averages --> cpufreq_update_util(rq, 0); run_rebalance_domains --> rebalance_domains --> load_balance --> find_busiest_group --> update_sd_lb_stats --> update_sg_lb_stats --> update_nohz_stats --> update_blocked_averages --> cpufreq_update_util(rq, 0);
(4) switched_to回调
switched_to_fair --> attach_task_cfs_rq --> attach_entity_cfs_rq --> attach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(5) task_change_group回调
task_change_group_fair --> task_move_group_fair --> attach_task_cfs_rq --> attach_entity_cfs_rq --> attach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(6) 唤醒进程时
各种锁,进程间通信机制调用wake_up_q _do_fork wake_up_new_task --> post_init_entity_util_avg --> attach_entity_cfs_rq --> attach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(7) sched_online_group调用时
sched_online_group --> online_fair_sched_group --> attach_entity_cfs_rq --> attach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(8) migrate_task_rq回调时
migrate_task_rq_fair --> detach_entity_cfs_rq --> detach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(9) switched_from回调时
switched_from_fair --> detach_task_cfs_rq --> detach_entity_cfs_rq --> detach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(10) switched_to回调时
switched_to_fair --> detach_task_cfs_rq --> detach_entity_cfs_rq --> detach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(11) 往rq中添加/移除任务时
enqueue_entity dequeue_entity set_next_entity put_prev_entity entity_tick enqueue_task_fair dequeue_task_fair __update_blocked_fair propagate_entity_cfs_rq detach_entity_cfs_rq attach_entity_cfs_rq sched_group_set_shares update_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags) update_load_avg --> attach_entity_load_avg --> cfs_rq_util_change(cfs_rq, flags) --> cpufreq_update_util(rq, flags)
(12) scheduler_tick 中直接调用调频
scheduler_tick --> cpufreq_update_util
3. 以调度节拍为例介绍调频
(1) 调用路径
时钟中断 --> scheduler_tick --> curr->sched_class->task_tick(CFS:task_tick_fair) --> entity_tick --> update_load_avg --> cfs_rq_util_change --> cpufreq_update_util(rq, flags)
(2) cpufreq_update_util(rq, flags)被Qcom更改为只接受flag中设置SCHED_CPUFREQ_WALT标志的调频了。
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { struct update_util_data *data; u64 clock; #ifdef CONFIG_SCHED_WALT if (!(flags & SCHED_CPUFREQ_WALT)) /*变成只接受设置有WALT标志位的调频*/ return; clock = sched_ktime_clock(); #else clock = rq_clock(rq); #endif data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, cpu_of(rq))); if (data) data->func(data, clock, flags); }
(3) cpufreq_update_util(rq, flags) 中flags支持的调频标志位
//include\linux\sched\cpufreq.h #define SCHED_CPUFREQ_IOWAIT (1U << 0) #define SCHED_CPUFREQ_MIGRATION (1U << 1) #define SCHED_CPUFREQ_INTERCLUSTER_MIG (1U << 3) #define SCHED_CPUFREQ_WALT (1U << 4) #define SCHED_CPUFREQ_PL (1U << 5) #define SCHED_CPUFREQ_EARLY_DET (1U << 6) #define SCHED_CPUFREQ_CONTINUE (1U << 8)
4. cpufreq还与thermal的cooldown有关联
struct cpufreq_policy { ... struct thermal_cooling_device *cdev };
5. governor/driver/policy分别是
# cat /sys/devices/system/cpu/cpufreq/policy0/scaling_driver //driver qcom-cpufreq-hw # cat /sys/devices/system/cpu/cpufreq/policy0/scaling_governor //governor schedutil # ls /sys/devices/system/cpu/cpufreq/policy0/schedutil //policy above_hispeed_delay down_rate_limit_us hispeed_freq hispeed_load pl rtg_boost_freq target_loads up_rate_limit_us