水源源码解析之进度调节机制(基于3.16-rc4卡塔尔

《奔跑吧linux内核》3.2笔记,美中不足还望我们商量指正

进度调治所接收到的数据结构:

提出阅读博文理解linux cfs调度器

1.就绪队列

  进度大概能够分为人机联作式进度批管理进度实时过程。对于分化的进度接纳分化的调节计谋,目前Linux内核中私下认可达成了4种调节战略,分别是deadline、realtime、CFS和idle,分别适用struct sched_class来定义调节类。

基本为每一个cpu成立三个经过就绪队列,该队列上的进程均由该cpu实践,代码如下(kernel/sched/core.c卡塔尔国。

  4种调节类经过next指针串联在一块儿,顾客空间程序能够运用调节战术API函数(sched_setscheduler()卡塔 尔(阿拉伯语:قطر‎来设定客商进度的调治战术。

1 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

标题后生可畏:请简述对经过调治器的知晓,早起Linux内核调整器(富含O(N)和O(1)卡塔 尔(英语:State of Qatar)调整器是何等做事的?

概念了叁个struct rq结构体数组,每一个数组成分是一个就绪队列,对应贰个cpu。下边看下struct rq结构体(kernel/sched/sched.h卡塔尔:

  调整器是按自然的办法对进程展开调治的后生可畏种机制,供给为顺序普通进程尽恐怕公平地分享CPU时间。

图片 1图片 2

  O(N)调整器宣布与一九九四年,从就绪队列中相比较全部进度的优先级,然后选取二个参天优先级的进程作为下多个调整进度。每二个经过有多个定位时间片,当进度时间片使用完事后,调节器会接纳下一个调迈过程,当全数进程都运作叁次后再重新分配时间片。那些调节器选用下三个调整进度前需求遍历整个就绪队列,花费O(N)时间。

  1 struct rq {
  2     /* runqueue lock: */
  3     raw_spinlock_t lock;
  4 
  5     /*
  6      * nr_running and cpu_load should be in the same cacheline because
  7      * remote CPUs use both these fields when doing load calculation.
  8      */
  9     unsigned int nr_running;
 10 #ifdef CONFIG_NUMA_BALANCING
 11     unsigned int nr_numa_running;
 12     unsigned int nr_preferred_running;
 13 #endif
 14     #define CPU_LOAD_IDX_MAX 5
 15     unsigned long cpu_load[CPU_LOAD_IDX_MAX];
 16     unsigned long last_load_update_tick;
 17 #ifdef CONFIG_NO_HZ_COMMON
 18     u64 nohz_stamp;
 19     unsigned long nohz_flags;
 20 #endif
 21 #ifdef CONFIG_NO_HZ_FULL
 22     unsigned long last_sched_tick;
 23 #endif
 24     int skip_clock_update;
 25 
 26     /* capture load from *all* tasks on this cpu: */
 27     struct load_weight load;
 28     unsigned long nr_load_updates;
 29     u64 nr_switches;
 30 
 31     struct cfs_rq cfs;
 32     struct rt_rq rt;
 33     struct dl_rq dl;
 34 
 35 #ifdef CONFIG_FAIR_GROUP_SCHED
 36     /* list of leaf cfs_rq on this cpu: */
 37     struct list_head leaf_cfs_rq_list;
 38 
 39     struct sched_avg avg;
 40 #endif /* CONFIG_FAIR_GROUP_SCHED */
 41 
 42     /*
 43      * This is part of a global counter where only the total sum
 44      * over all CPUs matters. A task can increase this counter on
 45      * one CPU and if it got migrated afterwards it may decrease
 46      * it on another CPU. Always updated under the runqueue lock:
 47      */
 48     unsigned long nr_uninterruptible;
 49 
 50     struct task_struct *curr, *idle, *stop;
 51     unsigned long next_balance;
 52     struct mm_struct *prev_mm;
 53 
 54     u64 clock;
 55     u64 clock_task;
 56 
 57     atomic_t nr_iowait;
 58 
 59 #ifdef CONFIG_SMP
 60     struct root_domain *rd;
 61     struct sched_domain *sd;
 62 
 63     unsigned long cpu_capacity;
 64 
 65     unsigned char idle_balance;
 66     /* For active balancing */
 67     int post_schedule;
 68     int active_balance;
 69     int push_cpu;
 70     struct cpu_stop_work active_balance_work;
 71     /* cpu of this runqueue: */
 72     int cpu;
 73     int online;
 74 
 75     struct list_head cfs_tasks;
 76 
 77     u64 rt_avg;
 78     u64 age_stamp;
 79     u64 idle_stamp;
 80     u64 avg_idle;
 81 
 82     /* This is used to determine avg_idle's max value */
 83     u64 max_idle_balance_cost;
 84 #endif
 85 
 86 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 87     u64 prev_irq_time;
 88 #endif
 89 #ifdef CONFIG_PARAVIRT
 90     u64 prev_steal_time;
 91 #endif
 92 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
 93     u64 prev_steal_time_rq;
 94 #endif
 95 
 96     /* calc_load related fields */
 97     unsigned long calc_load_update;
 98     long calc_load_active;
 99 
100 #ifdef CONFIG_SCHED_HRTICK
101 #ifdef CONFIG_SMP
102     int hrtick_csd_pending;
103     struct call_single_data hrtick_csd;
104 #endif
105     struct hrtimer hrtick_timer;
106 #endif
107 
108 #ifdef CONFIG_SCHEDSTATS
109     /* latency stats */
110     struct sched_info rq_sched_info;
111     unsigned long long rq_cpu_time;
112     /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
113 
114     /* sys_sched_yield() stats */
115     unsigned int yld_count;
116 
117     /* schedule() stats */
118     unsigned int sched_count;
119     unsigned int sched_goidle;
120 
121     /* try_to_wake_up() stats */
122     unsigned int ttwu_count;
123     unsigned int ttwu_local;
124 #endif
125 
126 #ifdef CONFIG_SMP
127     struct llist_head wake_list;
128 #endif
129 };

  O(1)调解器用于Linux2.6.23水源早先,它为各类CPU维护风度翩翩组经过优先级队列,各样优先级八个行列,那样在筛选下八个进度时,只要查询优先级队列相应的位图即可以见到道哪些队列中有就绪进度,查询时间常数为O(1)。

struct rq

标题二:请简述进程优先级、nice和权重之间的涉及。

该结构体是本地cpu全部进程组成的服服帖帖队列,在linux内核中,进度被分成普通进程和实时进度,那三种进度的调治计谋是不相同的,由此在31-32行能够看出rq结构体中又内嵌了struct cfs_rq cfs和struct rt_rq rt八个子就绪队列,分别来公司普通进度和实时进程(普通进度将应用完全公平级调动度战略cfs,而实时进度将选择实时调节战略卡塔尔,第33行struct dl_rq dl调解空闲进程,这段时间不研讨。所以,假设我们研讨的是平常进度的调治,供给关注的正是struct cfs_rq cfs队列;假设探讨的是实时进度,就只关注struct rt_rq rt队列。

  内核使用0~139的数值表示经过的优先级,数值越低优先级越高。优先级0~99给实时进程使用,100~139给普通进度使用。在顾客空间由二个理念的变量nice值映射到平凡进度的优先级(100~139)。

1.1家常进度的就绪队列struct cfs_rq(kernel/sched/sched.h)

  nice值从-20~19,进度暗中同意的nice值为0。那足以清楚为叁14个阶段,nice值越高,则先行级越低,反之亦然。(nice每变化1,则附和的进度获得CPU的年华就改成一成卡塔 尔(阿拉伯语:قطر‎。

图片 3图片 4

  权重新闻即为调解实体的权重,为了总结方便,内核约定nice值为0的权重值为1024,别的的nice值对应相应的权重值能够透过查表的办法来得到,表即为prio_to_weight。

 1 /* CFS-related fields in a runqueue */
 2 struct cfs_rq {
 3     struct load_weight load;
 4     unsigned int nr_running, h_nr_running;
 5 
 6     u64 exec_clock;
 7     u64 min_vruntime;
 8 #ifndef CONFIG_64BIT
 9     u64 min_vruntime_copy;
10 #endif
11 
12     struct rb_root tasks_timeline;
13     struct rb_node *rb_leftmost;
14 
15     /*
16      * 'curr' points to currently running entity on this cfs_rq.
17      * It is set to NULL otherwise (i.e when none are currently running).
18      */
19     struct sched_entity *curr, *next, *last, *skip;
20 
21 #ifdef    CONFIG_SCHED_DEBUG
22     unsigned int nr_spread_over;
23 #endif
24 
25 #ifdef CONFIG_SMP
26     /*
27      * CFS Load tracking
28      * Under CFS, load is tracked on a per-entity basis and aggregated up.
29      * This allows for the description of both thread and group usage (in
30      * the FAIR_GROUP_SCHED case).
31      */
32     unsigned long runnable_load_avg, blocked_load_avg;
33     atomic64_t decay_counter;
34     u64 last_decay;
35     atomic_long_t removed_load;
36 
37 #ifdef CONFIG_FAIR_GROUP_SCHED
38     /* Required to track per-cpu representation of a task_group */
39     u32 tg_runnable_contrib;
40     unsigned long tg_load_contrib;
41 
42     /*
43      *   h_load = weight * f(tg)
44      *
45      * Where f(tg) is the recursive weight fraction assigned to
46      * this group.
47      */
48     unsigned long h_load;
49     u64 last_h_load_update;
50     struct sched_entity *h_load_next;
51 #endif /* CONFIG_FAIR_GROUP_SCHED */
52 #endif /* CONFIG_SMP */
53 
54 #ifdef CONFIG_FAIR_GROUP_SCHED
55     struct rq *rq;    /* cpu runqueue to which this cfs_rq is attached */
56 
57     /*
58      * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
59      * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
60      * (like users, containers etc.)
61      *
62      * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
63      * list is used during load balance.
64      */
65     int on_list;
66     struct list_head leaf_cfs_rq_list;
67     struct task_group *tg;    /* group that "owns" this runqueue */
68 
69 #ifdef CONFIG_CFS_BANDWIDTH
70     int runtime_enabled;
71     u64 runtime_expires;
72     s64 runtime_remaining;
73 
74     u64 throttled_clock, throttled_clock_task;
75     u64 throttled_clock_task_time;
76     int throttled, throttle_count;
77     struct list_head throttled_list;
78 #endif /* CONFIG_CFS_BANDWIDTH */
79 #endif /* CONFIG_FAIR_GROUP_SCHED */
80 };

主题材料三:请简述CFS调解器是如何是好事的。

struct cfs_rq

  CFS调整器丢掉在此以前固定期间片和一贯调节周期的算法,采纳进程权重值的比例来量化和测算实际运作时刻。引进设想机械钟的定义,各个进程的诬捏时间是事实上运作时刻相对nice值为0的权重的比例值。进度依照各自不一样的速率比在情理挂钟节拍内发展。nice值小的经过,优先级高且权重大,其虚构机械钟比真正石英钟跑得慢,不过足以博得超级多的运营时刻;反之,nice值大的长河,优先级低,权重也低,其伪造石英钟比实际机械钟跑得快,得到相当少的运作时刻。CFS调解器总是挑精拣肥虚构石英钟跑得慢的经过,形似叁个二种自动变速箱,nice值为0的历程是条件齿轮,其余各种进度在区别变速比下相互追赶,从而达到公正正义。

cfs_rq就绪队列是以红黑树的款式来协会调整实体。第12行tasks_timeline成员就是红黑树的树根。第13行rb_leftmost指向了红黑树最左边包车型地铁左孩子(下一个可调治的实业卡塔 尔(英语:State of Qatar)。第19行curr指向当下正运营的实体,next指向将被提示的长河,last指向唤醒next进度的进度,next和last用法后面会提到。第55行rq指向了该cfs_rq就绪队列所属的rq队列。

难点四:CFS调节器中的vruntime是何等计算的?

1.2实时进程的就绪队列struct rt_rq(kernel/sched/sched.h)

  vruntime=(delta_exec*nice_0_weight)/weight。其中,delta_exec为实在运转时刻,nice_0_weight为nice为0的权重值,weight代表该进程的权重值。在update_curr()函数中,实现了该值的计量,当时,为了计算高效,将统计办法成为了乘法和平运动动vruntime=(delta_exec*nice_0_weight*inv_weight)>>shift,其中inv_weight=2^32/weight,是优先计算好寄存在prio_to_wmult中的。

图片 5图片 6

标题五:vruntime是哪天更新的?

 1 /* Real-Time classes' related field in a runqueue: */
 2 struct rt_rq {
 3     struct rt_prio_array active;
 4     unsigned int rt_nr_running;
 5 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 6     struct {
 7         int curr; /* highest queued rt task prio */
 8 #ifdef CONFIG_SMP
 9         int next; /* next highest */
10 #endif
11     } highest_prio;
12 #endif
13 #ifdef CONFIG_SMP
14     unsigned long rt_nr_migratory;
15     unsigned long rt_nr_total;
16     int overloaded;
17     struct plist_head pushable_tasks;
18 #endif
19     int rt_queued;
20 
21     int rt_throttled;
22     u64 rt_time;
23     u64 rt_runtime;
24     /* Nests inside the rq lock: */
25     raw_spinlock_t rt_runtime_lock;
26 
27 #ifdef CONFIG_RT_GROUP_SCHED
28     unsigned long rt_nr_boosted;
29 
30     struct rq *rq;
31     struct task_group *tg;
32 #endif
33 };

  创立新进度,插足就绪队列,调治tick等都会更新当前vruntime值。

struct rt_rq

难点六:CFS调治器中的min_vruntime有如何功能?

2.调整实体(include/linux/sched.h卡塔 尔(阿拉伯语:قطر‎

  min_vruntime在CFS就绪队列数据结构中,单步依次增加,用于追踪该就绪队列红黑树中幽微的vruntime。

2.1习以为常进程的调整实体sched_entity

标题七:CFS调治器对新创制的经过和刚唤醒的历程有什么照应?

 1 struct sched_entity {
 2     struct load_weight    load;        /* for load-balancing */
 3     struct rb_node        run_node;
 4     struct list_head    group_node;
 5     unsigned int        on_rq;
 6 
 7     u64            exec_start;
 8     u64            sum_exec_runtime;
 9     u64            vruntime;
10     u64            prev_sum_exec_runtime;
11 
12     u64            nr_migrations;
13 
14 #ifdef CONFIG_SCHEDSTATS
15     struct sched_statistics statistics;
16 #endif
17 
18 #ifdef CONFIG_FAIR_GROUP_SCHED
19     int            depth;
20     struct sched_entity    *parent;
21     /* rq on which this entity is (to be) queued: */
22     struct cfs_rq        *cfs_rq;
23     /* rq "owned" by this entity/group: */
24     struct cfs_rq        *my_q;
25 #endif
26 
27 #ifdef CONFIG_SMP
28     /* Per-entity load-tracking */
29     struct sched_avg    avg;
30 #endif
31 };

本文由2020欧洲杯官方投注-2020欧洲杯官方投注网址发布于win7,转载请注明出处:水源源码解析之进度调节机制(基于3.16-rc4卡塔尔

相关阅读