14.3 balance_pgdat函数

    balance_pgdat()函数是回收页面的主函数。这个函数比较长,首先看一个框架,主体函数是一个很长的while循环。

代码如下:

/*
 * For kswapd, balance_pgdat() will work across all this node's zones until
 * they are all at high_wmark_pages(zone).
 *
 * Returns the final order kswapd was reclaiming at
 *
 * There is special handling here for zones which are full of pinned pages.
 * This can happen if the pages are all mlocked, or if they are all used by
 * device drivers (say, ZONE_DMA).  Or if they are all in use by hugetlb.
 * What we do is to detect the case where all pages in the zone have been
 * scanned twice and there has been zero successful reclaim.  Mark the zone as
 * dead and from now on, only perform a short scan.  Basically we're polling
 * the zone for when the problem goes away.
 *
 * kswapd scans the zones in the highmem->normal->dma direction.  It skips
 * zones which have free_pages > high_wmark_pages(zone), but once a zone is
 * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the
 * lower zones regardless of the number of free pages in the lower zones. This
 * interoperates with the page allocator fallback scheme to ensure that aging
 * of pages is balanced across the zones.
 */
static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                            int *classzone_idx)
{
    int i;
    int end_zone = 0;   /* Inclusive.  0 = ZONE_DMA */
    unsigned long nr_soft_reclaimed;
    unsigned long nr_soft_scanned;

    /*struct scan_control数据结构用于控制页面回收的参数,例如要回收页面的个数nr_to_reclaim、
    分配掩码gfp_mask、分配的阶数order(2^order个页面)、扫描LRU链表的优先级priority等。
    priority成员表示扫描的优先级,用于计算每次扫描页面的数量,计算方法total_size >> priority,
    初始值为12,依次递减。priority数值越低,扫描的页面数量越大,相当于逐步加大扫描粒度。
    struct scan_control定义查看下面代码*/
    struct scan_control sc = {
        .gfp_mask = GFP_KERNEL,
        .order = order,
        .priority = DEF_PRIORITY,
        .may_writepage = !laptop_mode,
        .may_unmap = 1,
        .may_swap = 1,
    };
    count_vm_event(PAGEOUTRUN);
    /*while大循环是页面回收机制的核心框架,可以分成三部分理解:*/
    do {
        unsigned long nr_attempted = 0;
        bool raise_priority = true;
        bool pgdat_needs_compaction = (order > 0);

        sc.nr_reclaimed = 0;

        /*
         * Scan in the highmem->dma direction for the highest
         * zone which needs scanning
         */
        /*(1) 从高端zone往低端zone方向查找第一个处于不平衡状态的end_zone*/
       /*此for循环,从ZONE_HIGHMEM->ZONE_NORMAL的方向对zone进行扫描,直到找到第一个不平衡的zone,即水位处于
        WAMARK_HIGH之下的zone为止。同样使用zone_balanced()函数来计算zone是否处于WMARK_HIGH水位之上,找到之后
        保存到end_zone变量中*/
        for (i = pgdat->nr_zones - 1; i >= 0; i--) {
            struct zone *zone = pgdat->node_zones + i;

            if (!populated_zone(zone))
                continue;

            if (sc.priority != DEF_PRIORITY &&
                !zone_reclaimable(zone))
                continue;

            /*
             * Do some background aging of the anon list, to give
             * pages a chance to be referenced before reclaiming.
             */
            age_active_anon(zone, &sc);

            /*
             * If the number of buffer_heads in the machine
             * exceeds the maximum allowed level and this node
             * has a highmem zone, force kswapd to reclaim from
             * it to relieve lowmem pressure.
             */
            if (buffer_heads_over_limit && is_highmem_idx(i)) {
                end_zone = i;
                break;
            }
        
            /*判断zone的水位是否处于高水位之上。*/
            if (!zone_balanced(zone, order, 0, 0)) {
                end_zone = i;
                break;
            } else {
                /*
                 * If balanced, clear the dirty and congested
                 * flags
                 */
                clear_bit(ZONE_CONGESTED, &zone->flags);
                clear_bit(ZONE_DIRTY, &zone->flags);
            }
        }

        if (i < 0)
            goto out;
        /*(2) 从最低端zone开始页面回收,一直到end_zone*/
        /*此for循环是沿着normal_zone到刚才找到的end_zone的方向进行扫描,
        确定是否需要内存规整,当zone的内存处于WMARK_LOW之上则不需要内存规整*/
        for (i = 0; i <= end_zone; i++) {
            struct zone *zone = pgdat->node_zones + i;

             /*判断zone里实际管理的页面数量是否还有zone->present_pages*/
            if (!populated_zone(zone))
                continue;

            /*
             * If any zone is currently balanced then kswapd will
             * not call compaction as it is expected that the
             * necessary pages are already available.
             */
            /*这里判断是否需要内存规则(memory compaction),当order大于0且
            当前zone处于WMARK_LOW水位之上,则不需要内存规整*/
            if (pgdat_needs_compaction &&
                    zone_watermark_ok(zone, order,
                        low_wmark_pages(zone),
                        *classzone_idx, 0))
                pgdat_needs_compaction = false;
        }
        
        /*
         * If we're getting trouble reclaiming, start doing writepage
         * even in laptop mode.
         */
        if (sc.priority < DEF_PRIORITY - 2)
            sc.may_writepage = 1;

        /*
         * Now scan the zone in the dma->highmem direction, stopping
         * at the last zone which needs scanning.
         *
         * We do this because the page allocator works in the opposite
         * direction.  This prevents the page allocator from allocating
         * pages behind kswapd's direction of progress, which would
         * cause too much scanning of the lower zones.
         */
        /*此循环的方向依然是从ZONE_NORMAL到end_zone,为什么要从ZONE_NORMAL到end_zone
        的方向回收页面呢?因为伙伴系统分配系统是从ZONE_HIGHMEM到ZONE_NORMAL的方向,
        恰好和回收页面的方向相反,这样有利于减少对锁的争用(页面分配路径上的直接页面回
        收(directly reclaim)和kswapd有可能争用zone->lru_lock锁),提高效率。*/
        for (i = 0; i <= end_zone; i++) {
            struct zone *zone = pgdat->node_zones + i;

            if (!populated_zone(zone))
                continue;

            if (sc.priority != DEF_PRIORITY &&
                !zone_reclaimable(zone))
                continue;

            sc.nr_scanned = 0;

            nr_soft_scanned = 0;
            /*
             * Call soft limit reclaim before calling shrink_zone.
             */
            nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
                            order, sc.gfp_mask,
                            &nr_soft_scanned);
            sc.nr_reclaimed += nr_soft_reclaimed;

            /*
             * There should be no need to raise the scanning
             * priority if enough pages are already being scanned
             * that that high watermark would be met at 100%
             * efficiency.
             */
            /*kswapd_shrink_zone()是真正扫描和页面回收函数,扫描的参数和结果存放在
            struct scan_control sc中,kswapd_shrink_zone函数返回true,表明已经回
            收了所需要的页面,且不需要再提高扫描优先级*/
            if (kswapd_shrink_zone(zone, end_zone,
                           &sc, &nr_attempted))
                raise_priority = false;
        }

        /*
         * If the low watermark is met there is no need for processes
         * to be throttled on pfmemalloc_wait as they should not be
         * able to safely make forward progress. Wake them
         */
        /*
        如果进程加入到了node的pgdat->pfmemalloc_wait等待队列中。在此node的kswapd进行内存回收后,
        会通过再次判断此node是否平衡来唤醒这些进程,如果node平衡,则唤醒这些进程,否则不唤醒。实际
        上,不唤醒也说明了node没有平衡,kswapd还是会继续进行内存回收,最后kswapd实在没办法让node
        达到平衡水平下,会在kswapd睡眠前,将这些进程全部进行唤醒。
        */
        if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
                pfmemalloc_watermark_ok(pgdat))
            wake_up_all(&pgdat->pfmemalloc_wait);

        /*
         * Fragmentation may mean that the system cannot be rebalanced
         * for high-order allocations in all zones. If twice the
         * allocation size has been reclaimed and the zones are still
         * not balanced then recheck the watermarks at order-0 to
         * prevent kswapd reclaiming excessively. Assume that a
         * process requested a high-order can direct reclaim/compact.
         */
        /*sc.nr_reclaimed表示已经回收页面的数量。如果已经回收的页面大于等于
        2^order,为了避免页面碎片,这里设置order为0,以防止kswapd内核线程过
        于激进地回收页面。假如没有此判断,并且回收了2^order个页面后pgdat_balanced()
        还是发现内存节点没有达到平衡状态,那么它会循环下去,直到sc.priority<=0为止。
        注意要退出扫描,还需要判断当前内存节点的页面是否处于平衡状态pgdat_balanced()。*/
        if (order && sc.nr_reclaimed >= 2UL << order)
            order = sc.order = 0;

        /* Check if kswapd should be suspending */
        /*判断kswapd内核线程是否要停止或者睡眠*/
        if (try_to_freeze() || kthread_should_stop())
            break;

        /*
         * Compact if necessary and kswapd is reclaiming at least the
         * high watermark number of pages as requsted
         */
        /*判断是否需要对这个内存节点进行内存规整,优化内存碎片*/
        if (pgdat_needs_compaction && sc.nr_reclaimed > nr_attempted)
            compact_pgdat(pgdat, order);

        /*
         * Raise priority if scanning rate is too low or there was no
         * progress in reclaiming pages
         */
        /*判断是否需要提高扫描的优先级和扫描粒度。变量raise_priority默认为true
        当kswapd_shrink_zone()函数返回true,即成功回收了页面时,才会把
        raise_priority设置为false。如果扫描一轮后没有一个页面被回收释放,
        那么也需要提高优先级来增加扫描页面的强度。*/
        if (raise_priority || !sc.nr_reclaimed)
            sc.priority--;

    /*(3) 整个大循环不断加大扫描粒度,并且检查从最低端zone到
    classzone_idx的zone是否处于平衡状态*/
    } while (sc.priority >= 1 &&
         !pgdat_balanced(pgdat, order, *classzone_idx));
    /*pgdat_balanced()需要注意参数classzone_idex,它表示在页面分配路径上计算出来第一个
    最合适内存分配的zone的编号,通过wake_all_kswapds()传递下来
    下面查看pgdat_balanced()函数的实现*/

out:
    /*
     * Return the order we were reclaiming at so prepare_kswapd_sleep()
     * makes a decision on the order we were last reclaiming at. However,
     * if another caller entered the allocator slow path while kswapd
     * was awake, order will remain at the higher level
     */
    *classzone_idx = end_zone;
    return order;
}

此函数看完之后我们需要查看kswapd_shrink_zone()函数,在后面。

struct scan_control定义如下:

[mm/vmscan.c]

struct scan_control {
    /* How many pages shrink_list() should reclaim */
    unsigned long nr_to_reclaim;/*需要回收的页框数量*/
    /* This context's GFP mask */
    gfp_t gfp_mask;/*申请内存时使用的分配标志*/
    /* Allocation order */
    int order;/*申请内存时使用的order值,因为只有申请内存,然后内存不足时才会进行扫描*/
    /*
     * Nodemask of nodes allowed by the caller. If NULL, all nodes
     * are scanned.
     */
    nodemask_t  *nodemask;/*允许扫描的node结点的掩码*/
    /*
     * The memory cgroup that hit its limit and as a result is the
     * primary target of this reclaim invocation.
     */
    struct mem_cgroup *target_mem_cgroup;
    /* Scan (total_size >> priority) pages at once */
    /*扫描优先级,代码一次扫描(total_size >> priority)个页框
    优先级越低,一次扫描的页框数量就越多
    优先级越高,一次扫描的数量就越少
    默认优先级为12
    */
    int priority;
    unsigned int may_writepage:1;/*是否能够进行回写操作(与分配标志的__GFP_IO和__GFP_FS有关)*/
    /* Can mapped pages be reclaimed? */
    unsigned int may_unmap:1;/*能够进行unmap操作,就是将所有映射了此页的页表项清空*/
    /* Can pages be swapped as part of reclaim? */
    unsigned int may_swap:1;/*是否能够进行swap交换,如果不能,在内存回收时则不扫描匿名页面LRU链表*/
    /* Can cgroups be reclaimed below their normal consumption range? */
    unsigned int may_thrash:1;
    unsigned int hibernation_mode:1;
    /* One of the zones is ready for compaction */
    unsigned int compaction_ready:1;/*扫描结束后会标记,用于内存回收判断是否需要进行内存压缩*/
    /* Incremented by the number of inactive pages that were scanned */
    unsigned long nr_scanned;/*已经扫描的页框数量*/
    /* Number of pages freed so far during a call to shrink_zones() */
    unsigned long nr_reclaimed;/*已经回收的页框数量*/
};

回到balance_pgdat()函数

pgdat_balanced()函数实现:判断一个内存节点上的物理页面是否处于平衡状态,返回true,则表示该内存节点处于平衡状态。

什么是平衡状态?

对于order为0的情况,所有zone认为其是平衡的。

[kswapd()->balance_pgdat()->pgdat_balanced()]

/*
 * pgdat_balanced() is used when checking if a node is balanced.
 *
 * For order-0, all zones must be balanced!
 *
 * For high-order allocations only zones that meet watermarks and are in a
 * zone allowed by the callers classzone_idx are added to balanced_pages. The
 * total of balanced pages must be at least 25% of the zones allowed by
 * classzone_idx for the node to be considered balanced. Forcing all zones to
 * be balanced for high orders can cause excessive reclaim when there are
 * imbalanced zones.
 * The choice of 25% is due to
 *   o a 16M DMA zone that is balanced will not balance a zone on any
 *     reasonable sized machine
 *   o On all other machines, the top zone must be at least a reasonable
 *     percentage of the middle zones. For example, on 32-bit x86, highmem
 *     would need to be at least 256M for it to be balance a whole node.
 *     Similarly, on x86-64 the Normal zone would need to be at least 1G
 *     to balance a node on its own. These seemed like reasonable ratios.
 
对于高阶分配,仅将符合水印且位于调用者classzone_idx允许的区域中的区域添加到balance_pages。
平衡页面的总数必须至少为classzone_idx允许的节点平衡区域的25%。 当存在不平衡区域时,强制所
有区域达到高阶平衡可能会导致过多的回收。
25%的选择是由于  
    平衡的16M DMA区域不会平衡任何大小合理的计算机上的区域  
    在所有其他机器上,顶部区域必须至少是中间区域的合理百分比。
例如,在32位x86上,highmem必须至少为256M,才能平衡整个节点。
类似地,在x86-64上,正常区域至少需要1G才能单独平衡节点。 这些似乎是合理的比率。
*/

/*注意参数classzone_idx是由页面分配路径上传递过来的。*/
static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
{
    unsigned long managed_pages = 0;
    unsigned long balanced_pages = 0;
    int i;

    /* Check the watermark levels */
    /*遍历从最低端的zone到classzone_idx的页面是否处于平衡状态*/
    for (i = 0; i <= classzone_idx; i++) {
        struct zone *zone = pgdat->node_zones + i;

        if (!populated_zone(zone))
            continue;

        managed_pages += zone->managed_pages;/*zone->managed_pages表示被伙伴系统管理的页面数量*/

        /*
         * A special case here:
         *
         * balance_pgdat() skips over all_unreclaimable after
         * DEF_PRIORITY. Effectively, it considers them balanced so
         * they must be considered balanced here as well!
         */
        if (!zone_reclaimable(zone)) {
            balanced_pages += zone->managed_pages;
            continue;
        }

        /*zone_balanced()函数用于判断zone的空闲页面是否高于WMARK_HIGH水位之上,
        返回true,则表示zone处于WMARK_HIGH之上。
        如果这个zone的空闲页面高于WMARK_HIGH水位,那么这个zone所有管理的页面可以
        看作balanced_pages。下面查看此函数的实现*/
        if (zone_balanced(zone, order, 0, i))
            balanced_pages += zone->managed_pages;
        else if (!order)
            return false;
    }
    /*对于order为0的情况,所有的zone都是平衡的。对于order大于0的内存分配,
    需要统计从最低端zone到classzone_idx_zone中所有处于平衡状态zone的页面数量
    (balanced_pages),当大于这个节点的所有管理的页面managed_pages的25%,那么
    就认为这个内存节点处于平衡状态。*/
    if (order)
        return balanced_pages >= (managed_pages >> 2);
    else
        return true;
}
回到balance_pgdat()函数
zone_balanced()函数实现:zone的balanced由此函数来判断,这是针对于order来说的。 此函数有两个条件: (1) zone内的空闲内存高于高水位     水位是在内存初始化的时候根据每个zone的内存大小自动计算出来的,每个zone可能有不同的水位。具体计算水位的算法可能各个kernel版本不尽相同,比如某个版本的这么计算:对于非高端内存来说(64位机器上已经不存在高端内存了),min_watermark根据各个zone的内存占比,瓜分1024个page;low_watermark在此基础上增加25%;high_watermark在此基础上增加50%。(可以通过/proc/zoneinfo)看到系统中每一个zone,及其free_pages和watermark的情况)这里的高水位对于现在的大内存机器来说,其实只是九牛一毛。由这个高水位来作为判断zone_balanced的基础,可见内存在内存balance的问题上还是很注重系统性能的。 (2) 要求zone内的内存在0到给定order之间平衡分布     例如:总的内存超过高水位、order-1及以上的内存超过高水位的1/2、order-2及以上的内存超过高水位的1/4、......、一直到所要求的order。     为什么针对order的内存balanced不仅仅关心order阶的内存,而是关心0-order阶的所有内存呢?因为高order的连续内存是稀缺资源。如果内存分布不平衡,低order的内存请求可能因为低order内存的暂时缺货不得不将高order所对应的连续内存进行分拆。这种浪费是尽量避免的。并且这样的分拆可能导致高order内存耗尽,而导致满足不了对指定order的内存分配需求。     那么为什么针对order的内存balanced又仅仅关系0到order阶的所有内存、而不关心大于order阶的内存呢?当我们需要检查针对于order的zone_balanced时,起始是说明我们需要这个zone内2^order的连续页面,由于连续页面回收不易,也不是系统内最普遍的需求(给用户空间使用的内存基本上都是order-0的,不考虑hugepage这样的特殊情况),所以更高的order就不要考虑了。后面会看到,kswapd只针对order-0进行回收。
static bool zone_balanced(struct zone *zone, int order,
              unsigned long balance_gap, int classzone_idx)
{
    if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone) +
                    balance_gap, classzone_idx, 0))
        return false;

    if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
                order, 0, classzone_idx) == COMPACT_SKIPPED)
        return false;

    return true;
}

bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
            unsigned long mark, int classzone_idx, int alloc_flags)
{
    long free_pages = zone_page_state(z, NR_FREE_PAGES);

    if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
        free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);

    return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
                                free_pages);
}

回到pgdat_balanced()函数

kswapd_shrink_zone()函数实现:页面回收的真正函数

[kswapd()->balanced_pgdat()->kswapd_shrink_zone]


/*
 * kswapd shrinks the zone by the number of pages required to reach
 * the high watermark.
 *
 * Returns true if kswapd scanned at least the requested number of pages to
 * reclaim or if the lack of progress was due to pages under writeback.
 * This is used to determine if the scanning priority needs to be raised.
 */
static bool kswapd_shrink_zone(struct zone *zone,
                   int classzone_idx,
                   struct scan_control *sc,
                   unsigned long *nr_attempted)
{
/*
    struct scan_control sc = {
        .gfp_mask = GFP_KERNEL,
        .order = order,
        .priority = DEF_PRIORITY,
        .may_writepage = !laptop_mode,
        .may_unmap = 1,
        .may_swap = 1,
    };
*/
    int testorder = sc->order;
    unsigned long balance_gap;
    bool lowmem_pressure;

    /* Reclaim above the high watermark. */
    /*计算一轮扫描最多回收的页面sc->nr_to_reclaim个数,SWAP_CLUSTER_MAX
    定义为32个页面,high_wmark_pages()宏表示预期需要最多回收多少个页面
    才能达到WMARK_HIGH水位。这里比较两者取其最大值。这里会使用到
    zone->watermark[WMARK_HIGH]变量,WMARK_HIGH水位值的计算是在
    __setup_per_zone_wmarks()函数中,通过min_free_kbytes和zone管理的页
    面数等参数计算得出。*/
    sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone));

    /*
     * Kswapd reclaims only single pages with compaction enabled. Trying
     * too hard to reclaim until contiguous free pages have become
     * available can hurt performance by evicting too much useful data
     * from memory. Do not reclaim more than needed for compaction.
     */
    if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
            compaction_suitable(zone, sc->order, 0, classzone_idx)
                            != COMPACT_SKIPPED)
        testorder = 0;

    /*
     * We put equal pressure on every zone, unless one zone has way too
     * many pages free already. The "too many pages" is defined as the
     * high wmark plus a "gap" where the gap is either the low
     * watermark or 1% of the zone, whichever is smaller.
     */
    /* balance_gap相当于在判断zone是否处于平衡状态时增加了些难度,原来要判断空闲页面
    是否超过了高水位WMARK_HIGH即可,现在需要判断是否超过(WMARK_HIGH+balance_gap)。
    balance_gap值比较小,一般取低水位值或zone管理页面的1% */
    balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
            zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));

    /*
     * If there is no low memory pressure or the zone is balanced then no
     * reclaim is necessary
     */
    /*在调用shink_zone()函数之前,需要判断当前zone的页面是否处于平衡状态,即当前水位是否
    已经高于WMARK_HIGH+balanc_gap。如果已经处于平衡状态,那么不需要执行页面回收,直接返
    回即可。这里还考虑了buffer_head的使用情况,buffer_heads_over_limit全局变量定义在
    fd/buffer.c文件中,我们暂时先不考虑它。*/
    lowmem_pressure = (buffer_heads_over_limit && is_highmem(zone));
    if (!lowmem_pressure && zone_balanced(zone, testorder,
                        balance_gap, classzone_idx))
        return true;

    /*shrink_zone()函数去尝试回收zone的页面,它是kswapd内核线程的核心函数,后续详细介绍。*/
    shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);

    /* Account for the number of pages attempted to reclaim */
    *nr_attempted += sc->nr_to_reclaim;

    clear_bit(ZONE_WRITEBACK, &zone->flags);

    /*
     * If a zone reaches its high watermark, consider it to be no longer
     * congested. It's possible there are dirty pages backed by congested
     * BDIs but as pressure is relieved, speculatively avoid congestion
     * waits.
     */
    /*shrink_zone完成之后继续判断当前zone是否处于平衡状态,如果处于平衡状态,则可以
    不考虑block层的堵塞问题(congest),即使还有一些页面处于回写状态也是可以控制的,
    清除ZONE_CONGESTED比特位*/
    if (zone_reclaimable(zone) &&
        zone_balanced(zone, testorder, 0, classzone_idx)) {
        clear_bit(ZONE_CONGESTED, &zone->flags);
        clear_bit(ZONE_DIRTY, &zone->flags);
    }
    /*最后,如果扫描的页面数量(sc->nr_scaned)大于等于扫描目录(sc->nr_to_reclaim)
    的话表示扫描了足够多的页面,则该函数返回true。扫描了足够多的页面,也有可能一无
    所获。kswapd_shrink_zone()函数除了上面说的情况返回true以外,当zone处于平衡
    状态时也会返回true,返回false只会影响balance_pgdat()函数的扫描粒度。*/
    return sc->nr_scanned >= sc->nr_to_reclaim;
}

页面分配路径page allocator和页面回收路径kswapd之间有很多交互的地方,如下图:

14.3 balance_pgdat函数

  • 当页面分配路径page allocator在低水位中分配内存失败时,会唤醒kswapd内核线程,把order和preferred_zone传递给kswapd,这两个参数是他们之间的纽带。

  • 页面分配路径page allocator和页面回收路径kswapd在扫描zone时的方向是相反的,页面分配路径page allocator从ZONE_HIGHMEM往ZONE_NORMAL方向扫描zone,kswapd则相反。

  • 如何判断kswapd应该停止页面回收呢?一个重要的条件是从zone_normal到preferred_zone处于平衡状态时,那么就认为这个内存节点处于平衡状态,可以停止页面回收。

  • 页面分配路径page allocator和页面回收路径kswapd采用zone的水位标不同,page allocator采用低水位,即在低水位中无法分配内存,就唤醒kswapd;而kswapd判断是否停止页面回收采用的高水位。

上一篇:Codeforces 1158 F. Density of subarrays(找性质+分段dp)


下一篇:HttpRunner 3.x (五):variables变量的声明和引用