1.调度器
内核中用来安排进程执行的模块称为调度器(scheduler),它可以切换进程状态.
调度器是CPU*处理器的管理员,主要负责完成做两件事情:
- 选择某些就绪进 程来执行
- 打断某些执行的进程让它们变为就绪状态
调度器分配CPU时间的基本依据就是进程的优先级。上下文 切换(context switch):将进程在CPU中切换执行的过程,内核承担此任务,负责重建和存储被切换掉之前的CPU状态
2.调度类分析
sched_class结构体表示调度类,定义在kernel/sched/sched.h
成员分析
enqueue_task:向就绪队列添加一个进程,当某个任务进入运行状态,该函数将会被调用,将调度的实体放入到红黑树中
dequeue_task:将一个进程从就绪队列中删除,当某个任务退出可运行状态时,调用该函数,从红黑树中移除对应的调度实体
yield_task:在进程想要资源放弃对处理器的控制权的时候,使sched_yiled系统调用 ,会调用内核的API处理操作
check_preempt_curr:检查当前运行的任务是否被抢占
pick_next_task:选择要运行的最合适的进程
put_prev_task:用另一个进程代替当前的运行的进程
set_curr_task:当任务修改所调用的类,或修改它的任务组时,将调用这个函数
task_tick:每次激活周期调度器时,由周期性调度器使用
struct sched_class {
//系统中有多个调度类,按照调度的优先级存放在链表中
const struct sched_class *next;
//将进程加入到执行队列当中,即将调度实体(进程)存放到红黑树中,并对nr_running变量自增1
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
//从执行的队列当中删除进程,并对nr_running变量自减1
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
//放弃CPU的执行权,该函数执行后进先出,直接将调度实体放在红黑树的最右端
void (*yield_task) (struct rq *rq);
bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
//用于检查当前进程是否可被新进程抢占
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
/*
* It is the responsibility of the pick_next_task() method that will
* return the next task to call put_prev_task() on the @prev task or
* something equivalent.
*
* May return RETRY_TASK when it finds a higher prio class has runnable
* tasks.
*/
//选择下一个应用要运行的进程
struct task_struct * (*pick_next_task) (struct rq *rq,
struct task_struct *prev);
//将进程放回到运行队列当中
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
//为进程选择一个合适的CPU
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
//迁移任务到另一个CPU
void (*migrate_task_rq)(struct task_struct *p);
//用于唤醒进程
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
//修改CPU的亲和力
void (*set_cpus_allowed)(struct task_struct *p,
const struct cpumask *newmask);
//启动运行队列
void (*rq_online)(struct rq *rq);
//禁止运行队列
void (*rq_offline)(struct rq *rq);
#endif
//当进程改变它的调度类或进程组时被调用
void (*set_curr_task) (struct rq *rq);
//调用time tick函数,可能引起进程切换,驱动运行时(running)被抢占
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
//当进程创建的时候调用,不同的调度策略的进程初始化也不同
void (*task_fork) (struct task_struct *p);
//进程退出时调用
void (*task_dead) (struct task_struct *p);
/*
* The switched_from() call is allowed to drop rq->lock, therefore we
* cannot assume the switched_from/switched_to pair is serliazed by
* rq->lock. They are however serialized by p->pi_lock.
*/
//用于进程切换操作
void (*switched_from) (struct rq *this_rq, struct task_struct *task);
void (*switched_to) (struct rq *this_rq, struct task_struct *task);
//改变进程的优先级
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
int oldprio);
unsigned int (*get_rr_interval) (struct rq *rq,
struct task_struct *task);
void (*update_curr) (struct rq *rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_move_group) (struct task_struct *p);
#endif
};
3.调度类
Linux调度类:dl_sched_class、rt_sched_class、fair_sched_class及idle_sched_class等。每一个进程都对应一种调度策略,每一种调度策略又对应一种调度类,每一个调度类可以对应多种调度策略.
SCHED_FIFO调度策略的实时进程永远比SCHED_NORMAL调度策略的普通进程优先级高
调度类的优先级顺序:
stop_sched_class > dl_sched_class > rt_sched_class > fair_sched_class > idle_sched_class
//linux内核调度策略源码:/include/uapi/linux/sched.h
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
//实时调度器,调度策略:SCHED_FIFO,SCHED_RR
extern const struct sched_class rt_sched_class;
//完全公平调度器,调度策略:SCHED_NORMAL,SCHED_BATCH等
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
//优先级最高,会中断所有的其他进程,并且不会被其他的任务打断
const struct sched_class stop_sched_class = {
.next = &dl_sched_class,
.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,
.yield_task = yield_task_stop,
.check_preempt_curr = check_preempt_curr_stop,
.pick_next_task = pick_next_task_stop,
.put_prev_task = put_prev_task_stop,
const struct sched_class dl_sched_class = {
.next = &rt_sched_class,
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
.check_preempt_curr = check_preempt_curr_dl,
.pick_next_task = pick_next_task_dl,
.put_prev_task = put_prev_task_dl,
//作用于实时进程
const struct sched_class rt_sched_class = {
.next = &fair_sched_class,
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
.check_preempt_curr = check_preempt_curr_rt,
.pick_next_task = pick_next_task_rt,
.put_prev_task = put_prev_task_rt,
//每个CPU的第一个PID=0线程,swapper是一个静态线程,调度类属于idel_sched_class,一般运行在开机过程和CPU异常的时候会做dump
const struct sched_class idle_sched_class = {
/* .next is NULL */
/* no enqueue/yield_task for idle tasks */
/* dequeue is not valid, we print a debug message there: */
.dequeue_task = dequeue_task_idle,
.check_preempt_curr = check_preempt_curr_idle,
.pick_next_task = pick_next_task_idle,
.put_prev_task = put_prev_task_idle,
// 源码目录:kernel/sched/fair.c
//公平调度器CFS,一般常用于线程
const struct sched_class fair_sched_class = {
.next = &idle_sched_class,
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
.yield_to_task = yield_to_task_fair,
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
SCHED_NORMAL,SCHED_BATCH,SCHED_IDLE直接映射到fair_sched_class
SCHED_RR,SCHED_FIFO与rt_schedule_class进行关联
linux调度核心选择下一个合适的task运行时,会按照优先级顺序遍历调度类的pick_next_task函数.
4.优先级与调度策略
//源码:/include/linux/sched/prio.h
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
进程分类:
- 实时进程(Real-Time Process):优先级高,需要立即执行的进程
- 普通进程(Normal Process):优先级低,更长执行时间的进程
进程的优先级是一个0-139的数字,数字越小,优先级越高,0-99为实时进程,100-139为普通进程
调度策略
unsigned int policy:保存进程的调度策略
SHCED_NORMAL:用于普通进程,通过CFS调度器来实现
SHCED_BATCH:相当于SCHED_NORMAL分化的版本,采用分时策略,根据动态优先级,分配CPU运行所需要资源
SHCED_IDLE:优先级最低,在系统空空闲时才执行这类进程
SHCED_RR:轮流调度算法(实时调度策略)
SHCED_DEADLINE:新支持的实时进程调度策略,针对突发性的计算
SCHED_BATCH用于非交互处理器消耗性进程,SHCED_IDLE是在系统负载很低时使用CFS
/*
* Scheduling policies
*/
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
#define SCHED_DEADLINE 6
5.调度器结构分析
进程调度任务:合理分配CPU时间给运行的进程
调度器目标:有效的分配CPU时间片
主调度器:通过schedule()函数来完成进程的选择和切换
周期调度器:根据频率自动调用
scheduler_tick函数:根据进程运行时间触发调度
上下文切换:用于切换地址空间,切换寄存器,栈空间
源码目录:kernel/sched/sched.h
/* CFS-related fields in a runqueue */
//CFS调度运行队列,每个CPU的rq包含一个cfs_rq,每个组的sched_entity中也会有一个cfs_rq队列
struct cfs_rq {
//CFS运行队列中所有进程总负载
struct load_weight load;
//nr_running:cfs_rq中的调度实体数量,h_nr_running:只对进程有效
unsigned int nr_running, h_nr_running;
u64 exec_clock;
u64 min_vruntime;
#ifndef CONFIG_64BIT
u64 min_vruntime_copy;
#endif
//红黑树的root
struct rb_root tasks_timeline;
//下一个调度节点(红黑树最左边节点就是下一个调度的实体)
struct rb_node *rb_leftmost;
/*
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
struct sched_entity *curr, *next, *last, *skip;