03.调度器分析

1.调度器

内核中用来安排进程执行的模块称为调度器(scheduler),它可以切换进程状态.

调度器是CPU*处理器的管理员,主要负责完成做两件事情:

  • 选择某些就绪进 程来执行
  • 打断某些执行的进程让它们变为就绪状态

调度器分配CPU时间的基本依据就是进程的优先级。上下文 切换(context switch):将进程在CPU中切换执行的过程,内核承担此任务,负责重建和存储被切换掉之前的CPU状态

2.调度类分析

sched_class结构体表示调度类,定义在kernel/sched/sched.h

成员分析

enqueue_task:向就绪队列添加一个进程,当某个任务进入运行状态,该函数将会被调用,将调度的实体放入到红黑树中

dequeue_task:将一个进程从就绪队列中删除,当某个任务退出可运行状态时,调用该函数,从红黑树中移除对应的调度实体

yield_task:在进程想要资源放弃对处理器的控制权的时候,使sched_yiled系统调用 ,会调用内核的API处理操作

check_preempt_curr:检查当前运行的任务是否被抢占

pick_next_task:选择要运行的最合适的进程

put_prev_task:用另一个进程代替当前的运行的进程

set_curr_task:当任务修改所调用的类,或修改它的任务组时,将调用这个函数

task_tick:每次激活周期调度器时,由周期性调度器使用

struct sched_class {
	//系统中有多个调度类,按照调度的优先级存放在链表中
	const struct sched_class *next;

	//将进程加入到执行队列当中,即将调度实体(进程)存放到红黑树中,并对nr_running变量自增1
	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);

	//从执行的队列当中删除进程,并对nr_running变量自减1
	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);

	//放弃CPU的执行权,该函数执行后进先出,直接将调度实体放在红黑树的最右端
	void (*yield_task) (struct rq *rq);
	bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);

	//用于检查当前进程是否可被新进程抢占
	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);

	/*
	 * It is the responsibility of the pick_next_task() method that will
	 * return the next task to call put_prev_task() on the @prev task or
	 * something equivalent.
	 *
	 * May return RETRY_TASK when it finds a higher prio class has runnable
	 * tasks.
	 */

	//选择下一个应用要运行的进程
	struct task_struct * (*pick_next_task) (struct rq *rq,
						struct task_struct *prev);

	//将进程放回到运行队列当中
	void (*put_prev_task) (struct rq *rq, struct task_struct *p);

#ifdef CONFIG_SMP

	//为进程选择一个合适的CPU
	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);

	//迁移任务到另一个CPU
	void (*migrate_task_rq)(struct task_struct *p);

	//用于唤醒进程
	void (*task_waking) (struct task_struct *task);
	void (*task_woken) (struct rq *this_rq, struct task_struct *task);

	//修改CPU的亲和力
	void (*set_cpus_allowed)(struct task_struct *p,
				 const struct cpumask *newmask);

	//启动运行队列
	void (*rq_online)(struct rq *rq);
	//禁止运行队列
	void (*rq_offline)(struct rq *rq);

#endif

	//当进程改变它的调度类或进程组时被调用
	void (*set_curr_task) (struct rq *rq);

	//调用time tick函数,可能引起进程切换,驱动运行时(running)被抢占
	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);

	//当进程创建的时候调用,不同的调度策略的进程初始化也不同
	void (*task_fork) (struct task_struct *p);
	
	//进程退出时调用
	void (*task_dead) (struct task_struct *p);

	/*
	 * The switched_from() call is allowed to drop rq->lock, therefore we
	 * cannot assume the switched_from/switched_to pair is serliazed by
	 * rq->lock. They are however serialized by p->pi_lock.
	 */
	
	//用于进程切换操作
	void (*switched_from) (struct rq *this_rq, struct task_struct *task);
	void (*switched_to) (struct rq *this_rq, struct task_struct *task);

	//改变进程的优先级
	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
			     int oldprio);

	unsigned int (*get_rr_interval) (struct rq *rq,
					 struct task_struct *task);

	void (*update_curr) (struct rq *rq);

#ifdef CONFIG_FAIR_GROUP_SCHED
	void (*task_move_group) (struct task_struct *p);
#endif
};

3.调度类

Linux调度类dl_sched_class、rt_sched_class、fair_sched_class及idle_sched_class等。每一个进程都对应一种调度策略,每一种调度策略又对应一种调度类,每一个调度类可以对应多种调度策略.

SCHED_FIFO调度策略的实时进程永远比SCHED_NORMAL调度策略的普通进程优先级高

调度类的优先级顺序:

stop_sched_class > dl_sched_class > rt_sched_class > fair_sched_class > idle_sched_class

//linux内核调度策略源码:/include/uapi/linux/sched.h
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;

//实时调度器,调度策略:SCHED_FIFO,SCHED_RR
extern const struct sched_class rt_sched_class; 
//完全公平调度器,调度策略:SCHED_NORMAL,SCHED_BATCH等
extern const struct sched_class fair_sched_class;

extern const struct sched_class idle_sched_class;



//优先级最高,会中断所有的其他进程,并且不会被其他的任务打断
const struct sched_class stop_sched_class = {
	.next			= &dl_sched_class,

	.enqueue_task		= enqueue_task_stop,
	.dequeue_task		= dequeue_task_stop,
	.yield_task		= yield_task_stop,

	.check_preempt_curr	= check_preempt_curr_stop,

	.pick_next_task		= pick_next_task_stop,
	.put_prev_task		= put_prev_task_stop,

    

const struct sched_class dl_sched_class = {
	.next			= &rt_sched_class,
	.enqueue_task		= enqueue_task_dl,
	.dequeue_task		= dequeue_task_dl,
	.yield_task		= yield_task_dl,

	.check_preempt_curr	= check_preempt_curr_dl,

	.pick_next_task		= pick_next_task_dl,
	.put_prev_task		= put_prev_task_dl,
 
//作用于实时进程
const struct sched_class rt_sched_class = {
	.next			= &fair_sched_class,
	.enqueue_task		= enqueue_task_rt,
	.dequeue_task		= dequeue_task_rt,
	.yield_task		= yield_task_rt,

	.check_preempt_curr	= check_preempt_curr_rt,

	.pick_next_task		= pick_next_task_rt,
	.put_prev_task		= put_prev_task_rt,
 

//每个CPU的第一个PID=0线程,swapper是一个静态线程,调度类属于idel_sched_class,一般运行在开机过程和CPU异常的时候会做dump
const struct sched_class idle_sched_class = {
	/* .next is NULL */
	/* no enqueue/yield_task for idle tasks */

	/* dequeue is not valid, we print a debug message there: */
	.dequeue_task		= dequeue_task_idle,

	.check_preempt_curr	= check_preempt_curr_idle,

	.pick_next_task		= pick_next_task_idle,
	.put_prev_task		= put_prev_task_idle,

// 源码目录:kernel/sched/fair.c
//公平调度器CFS,一般常用于线程
const struct sched_class fair_sched_class = {
	.next			= &idle_sched_class,
	.enqueue_task		= enqueue_task_fair,
	.dequeue_task		= dequeue_task_fair,
	.yield_task		= yield_task_fair,
	.yield_to_task		= yield_to_task_fair,

	.check_preempt_curr	= check_preempt_wakeup,

	.pick_next_task		= pick_next_task_fair,
	.put_prev_task		= put_prev_task_fair,

SCHED_NORMAL,SCHED_BATCH,SCHED_IDLE直接映射到fair_sched_class

SCHED_RR,SCHED_FIFOrt_schedule_class进行关联

linux调度核心选择下一个合适的task运行时,会按照优先级顺序遍历调度类的pick_next_task函数.

4.优先级与调度策略

//源码:/include/linux/sched/prio.h
#define MAX_USER_RT_PRIO	100
#define MAX_RT_PRIO		MAX_USER_RT_PRIO

#define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)

进程分类:

  • 实时进程(Real-Time Process):优先级高,需要立即执行的进程
  • 普通进程(Normal Process):优先级低,更长执行时间的进程

进程的优先级是一个0-139的数字,数字越小,优先级越高,0-99为实时进程,100-139为普通进程

调度策略

unsigned int policy:保存进程的调度策略

SHCED_NORMAL:用于普通进程,通过CFS调度器来实现

SHCED_BATCH:相当于SCHED_NORMAL分化的版本,采用分时策略,根据动态优先级,分配CPU运行所需要资源

SHCED_IDLE:优先级最低,在系统空空闲时才执行这类进程

SHCED_RR:轮流调度算法(实时调度策略)

SHCED_DEADLINE:新支持的实时进程调度策略,针对突发性的计算

SCHED_BATCH用于非交互处理器消耗性进程,SHCED_IDLE是在系统负载很低时使用CFS

/*
 * Scheduling policies
 */
#define SCHED_NORMAL		0
#define SCHED_FIFO		1
#define SCHED_RR		2
#define SCHED_BATCH		3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE		5
#define SCHED_DEADLINE		6

5.调度器结构分析

进程调度任务:合理分配CPU时间给运行的进程

调度器目标:有效的分配CPU时间片

03.调度器分析

主调度器:通过schedule()函数来完成进程的选择和切换

周期调度器:根据频率自动调用

scheduler_tick函数:根据进程运行时间触发调度

上下文切换:用于切换地址空间,切换寄存器,栈空间

源码目录:kernel/sched/sched.h

/* CFS-related fields in a runqueue */
//CFS调度运行队列,每个CPU的rq包含一个cfs_rq,每个组的sched_entity中也会有一个cfs_rq队列
struct cfs_rq {
	//CFS运行队列中所有进程总负载
	struct load_weight load;

	//nr_running:cfs_rq中的调度实体数量,h_nr_running:只对进程有效
	unsigned int nr_running, h_nr_running;

	u64 exec_clock;
	u64 min_vruntime;
#ifndef CONFIG_64BIT
	u64 min_vruntime_copy;
#endif
	//红黑树的root
	struct rb_root tasks_timeline;
	
	//下一个调度节点(红黑树最左边节点就是下一个调度的实体)
	struct rb_node *rb_leftmost;

	/*
	 * 'curr' points to currently running entity on this cfs_rq.
	 * It is set to NULL otherwise (i.e when none are currently running).
	 */
	struct sched_entity *curr, *next, *last, *skip;
上一篇:MachineLearning入门---第4章---TensorFlow示例(cifar图⽚数据建模流程示例)


下一篇:online_jf.lua --累计在线时间领取物品(积分)的lua脚本