linux-3.2.36内核启动2-setup_arch中的内存初始化1(arm平台 分析高端内存和初始化memblock)【转】

转自:http://blog.csdn.net/tommy_wxie/article/details/17093307

上一篇微博留下了这几个函数,现在我们来分析它们

        sanity_check_meminfo();

        arm_memblock_init(&meminfo, mdesc);

        paging_init(mdesc);

        request_standard_resources(mdesc);

 

在上一微博有展现根据启动参数初始化meminfo,记录了物理内存的开始和大小

       sanity_check_meminfo();

有mmu的情况下这个函数才有意义,初始化高端内存,首先内核要选上这个

KernelFeatures下的

[*]High Memory Support

arch/arm/include/asm/setup.h

#ifdef CONFIG_ARCH_EP93XX

# define NR_BANKS 16

#else

# define NR_BANKS 8 三星当然是8

#endif

 

struct membank {

        phys_addr_t start;

        unsigned longsize;

        unsigned inthighmem;

};如果是高端内存highmem为1

 

struct meminfo {

        int nr_banks;

        struct membankbank[NR_BANKS];

};

extern struct meminfo meminfo;

我们现在的函数就是初始化meminfo这个全局变量

高端内存

    Linux内核的地址空间是3G~4G。假如说机器的内存为512M,那么内存的物

    理地址范围是:0~512,而映射到内核空间的范围是3G~3G+512M(可以叫low memory).

    而其余的空间都是高端内存的范围,即:3+512G~4G,但是为了避免越界等安全问题

    的考虑,高端内存又离开了低端内存8M空间,即从3G+512M+8M空间开始。linux内核又规定,高端内存至少为128M,即加入物理内存为1G,那么高端内存就是从896M~4G,即其最大地址:0xC0000000+896M,实际:0xC0000000+x(内存size)

简单举个例子,假设你有2G内存,而内核只有1G不能全部做线性映射,内核就会把前896M用于RAM线性映射,后128M可以通过更改映射关系访问剩下的内存。有三种方法:永久内核映射,临时映射,非连续内存分配(这些以后写关于内存管理的文章时再分析)。

没有全部贴

void __init sanity_check_meminfo(void)

{

        int i, j, highmem= 0;

 

        //wxl add

        printk(KERN_NOTICE"vmalloc_min = %lx\n", vmalloc_min);

打印结果

vmalloc_min = ee000000

vmalloc_min = (void *)(VMALLOC_END - SZ_128M);

arch/arm/mach-s3c2410/include/mach/vmalloc.h
#define VMALLOC_END 0xF6000000UL

 

0xF6000000-0x8000000=0xEE000000

3808M

 

        for (i = 0, j = 0;i < meminfo.nr_banks; i++) {

                structmembank *bank = &meminfo.bank[j];

                *bank =meminfo.bank[i];

 

#ifdef CONFIG_HIGHMEM

                _va()是物理地址转换虚拟地址

#define __virt_to_phys(x)      ((x) - PAGE_OFFSET + PHYS_OFFSET)

#define __phys_to_virt(x)      ((x) - PHYS_OFFSET + PAGE_OFFSET)

#define __va(x)                ((void *)__phys_to_virt((unsigned long)(x)))

下面的条件告诉了我们高端地址范围,大于等于vmalloc_min的好理解,小于PAGE_OFFSET是永久内核映射

                if(__va(bank->start) >= vmalloc_min ||

                   __va(bank->start) < (void *) PAGE_OFFSET)

                       highmem = 1;

                //wxl add

                printk(KERN_NOTICE "start:bank->start = %lx bank->size = %lx __va = %lx highmem = %d\n",(unsigned long)bank->start, (unsigned long)bank->size, (unsignedlong)__va(bank->start), highmem);

打印结果

start: bank->start = 30000000 bank->size = 4000000 __va =c0000000 highmem = 0

bank->start bank->size就是上一篇微博提到的

               bank->highmem = highmem;

 

                /*

                 * Splitthose memory banks which are partially overlapping

                 * thevmalloc area greatly simplifying things later.

                 */

                假设__va(bank->start) < vmalloc_min;它的大小可能会超过低端内存,也就是起始地址在低端,结束地址超过低端范围,那么就要把它分开,你可以简单看看代码

                if(__va(bank->start) < vmalloc_min &&

                   bank->size > vmalloc_min - __va(bank->start)) {

                        if(meminfo.nr_banks >= NR_BANKS) {

                               printk(KERN_CRIT "NR_BANKS too low, "

                                                 "ignoringhigh memory\n");

                        }else {

                               memmove(bank + 1, bank,

                                       (meminfo.nr_banks - i) * sizeof(*bank));

                               meminfo.nr_banks++;

                               i++;

                               bank[1].size -= vmalloc_min - __va(bank->start);

                               bank[1].start = __pa(vmalloc_min - 1) + 1;

                               bank[1].highmem = highmem = 1;

                               j++;

                        }

                       bank->size = vmalloc_min - __va(bank->start);

                }

                //wxl add

               printk(KERN_NOTICE "end: bank->start = %lx bank->size =%lx\n", (unsigned long)bank->start, (unsigned long)bank->size);

打印结果

end: bank->start = 30000000 bank->size = 4000000

 

#else

……

#endif

                重设低端内存限制

                if(!bank->highmem && bank->start + bank->size > lowmem_limit)

                        lowmem_limit =bank->start + bank->size;

 

                j++;

        }

……

 

 arm_memblock_init(&meminfo, mdesc);

        在此处按地址数据从小到大排序meminfo中的数据,并初始化全局的memblock数据。

void __init arm_memblock_init(struct meminfo *mi, structmachine_desc *mdesc)

{

        int i;

 

       sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),meminfo_cmp, NULL);排序不细看了,而且我的就一个bank

 

        memblock_init();

这个就是对memblock变量初始化,该赋初值的符初值,该清零的清零。说一点

memblock里有个memory是struct memblock_type

 

struct memblock_region {

        phys_addr_t base;

        phys_addr_t size;

};

 

struct memblock_type {

        unsigned longcnt;      /* number of regions */

        unsigned longmax;      /* size of the allocated array*/

        structmemblock_region *regions;

};

 

初始化

static struct memblock_regionmemblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;

static struct memblock_regionmemblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;

memblock.memory.max = INIT_MEMBLOCK_REGIONS; 值为128

memblock.memory.cnt = 1;

memblock.memory.regions[0].base = 0;

memblock.memory.regions[0].size = 0;

memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base =MEMBLOCK_INACTIVE;

MEMBLOCK_INACTIVE为0x44c9e71bUL

 

memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;

#define MEMBLOCK_ALLOC_ANYWHERE      (~(phys_addr_t)0)这样看你是多少位系统了,不过我们只要知道是anywhere

memblock还有个reserved,和memory初始化的值一样。

 

        for (i = 0; i <mi->nr_banks; i++)

               memblock_add(mi->bank[i].start, mi->bank[i].size);

long __init_memblock memblock_add(phys_addr_t base, phys_addr_tsize)

{

        return memblock_add_region(&memblock.memory,base, size);

我的base 0x30000000 size = 0x4000000;

}

 

static long __init_memblock memblock_add_region(structmemblock_type *type,

                                               phys_addr_t base, phys_addr_t size)

{

        phys_addr_t end =base + size;某bank的结束地址

        int i, slot = -1;

 

        /* First try andcoalesce this MEMBLOCK with others */

        for (i = 0; i <type->cnt; i++) {

                structmemblock_region *rgn = &type->regions[i];

                phys_addr_t rend = rgn->base +rgn->size;

 

                /* Exit ifthere's no possible hits */

                if(rgn->base > end || rgn->size == 0)检查是否在当前的bank中且size是否为0

                       break;

 

                /* Checkif we are fully enclosed within an existing

                 * block

                 */

                if(rgn->base <= base && rend >= end)检查是否超过当前block范围

                       return 0;

 

                /* Checkif we overlap or are adjacent with the bottom

                 * of a block.

                 */

                if (base< rgn->base && end >= rgn->base) {

                        /*If we can't coalesce, create a new block */

                        if(!memblock_memory_can_coalesce(…这个函数一定返回1所以省去,下同

                       

                        /*We extend the bottom of the block down to our

                         *base

                        */

                       rgn->base = base;

                       rgn->size = rend - base;

 

                        /* Return if we have nothingelse to allocate

                         *(fully coalesced)

                        */

                        if(rend >= end)

                               return 0;

 

                        /*We continue processing from the end of the

                         *coalesced block.

                        */

                       base = rend;

                       size = end - base;

                        上面这一段就是把去掉低端重叠区,

                }

 

                /* Now check if we overlap or areadjacent with the

                 * top ofa block

                 */

                顶部的重叠区去处

                if (base<= rend && end >= rend) {

                        /*If we can't coalesce, create a new block */

                        if(!memblock_memory_can_coalesce(…

 

                        size += (base - rgn->base);

                       base = rgn->base;

                       memblock_remove_region(type, i--);

        for (i = r; i < type->cnt - 1; i++) {

               type->regions[i].base = type->regions[i + 1].base;

               type->regions[i].size = type->regions[i + 1].size;

        }

        type->cnt--;

        有重叠说明连续的,就把它合并到一起,

                }

        }

 

        /* If the array isempty, special case, replace the fake

         * filler regionand return

         */

        if ((type->cnt== 1) && (type->regions[0].size == 0)) {

我的平台现在调用会执行到这

               type->regions[0].base = base; 0x30000000

               type->regions[0].size = size; 0x4000000

                return 0;

        }

 

 new_block:新的block

        /* If we are outof space, we fail. It's too late to resize the array

         * but then thisshouldn't have happened in the first place.

    */

        if(WARN_ON(type->cnt >= type->max))超过最大就返回

                return -1;

 

        /* Couldn'tcoalesce the MEMBLOCK, so add it to the sorted table. */

        不能合并我们按顺序存到regions中

        for (i =type->cnt - 1; i >= 0; i--) {

                if (base< type->regions[i].base) {

                       type->regions[i+1].base = type->regions[i].base;

                       type->regions[i+1].size = type->regions[i].size;

                } else {

                       type->regions[i+1].base = base;

                       type->regions[i+1].size = size;

                       slot = i + 1;

                       break;

                }

        }

        if (base <type->regions[0].base) {

               type->regions[0].base = base;

               type->regions[0].size = size;

                slot = 0;

        }

        type->cnt++;

 

        /* The array isfull ? Try to resize it. If that fails, we undo

         * our allocationand return an error

         */

        满了尝试重定义大小

        if (type->cnt== type->max && memblock_double_array(type)) {

               BUG_ON(slot < 0);

               memblock_remove_region(type, slot);

                return -1;

        }

 

        return 0;

}

 

看了源码其实就是把之前的bank信息存到memblock.memory.regions中。

 

        /* Register thekernel text, kernel data and initrd with memblock. */

Kernel XIP 原理如下,内核映像在Flash 设备上执行以后,只把映像中要读写的.data和.bss 拷贝到SDRAM 主存中,同时设置好系统的MMU,内核运行过程中,代码段.text 指向Flash 空间,.data 和.bss 指向SDRAM 主存空间。相对于全映射的执行方式,系统节省了解压缩和拷贝代码段的时间,节省了代码段占用的RAM 主存空间。

我的没有用这个东西。不过从下面你可以看到XIP没有吧text存入memblock

#ifdef CONFIG_XIP_KERNEL

       memblock_reserve(__pa(_sdata), _end - _sdata);

#else

        memblock_reserve(__pa(_stext),_end - _stext);

在System.map下

 

c00081e0 T _stext

c0318000 D _sdata

c0367db8 A _end

__pa(_stext) = 0x300081e0

 

#endif

long __init_memblock memblock_reserve(phys_addr_t base,phys_addr_t size)

{

        structmemblock_type *_rgn = &memblock.reserved;

 

        BUG_ON(0 == size);

 

        returnmemblock_add_region(_rgn, base, size);这个看上面

}

通过上面的我们可以算出,不过还是加个打印吧

       //wxl add

       printk(KERN_NOTICE "memory:\n");

       for (i = 0; i <memblock.memory.cnt; i++)

       {

           printk(KERN_NOTICE"regions[%d] base = %lx size = %lx\n", i, (unsignedlong)memblock.memory.regions[i].base, (unsigned long)memblock.memory.regions[i].size);

       }

 

       printk(KERN_NOTICE"reserved:\n");

       for (i = 0; i <memblock.reserved.cnt; i++)

       {

           printk(KERN_NOTICE"regions[%d] base = %lx size = %lx\n", i, (unsignedlong)memblock.reserved.regions[i].base, (unsigned long)memblock.reserved.regions[i].size);

       }

打印结果

memory:

regions[0] base = 30000000 size = 4000000

reserved:

regions[0] base = 300081e0 size = 35fbd8

用上面计算也是这个结果,到此reserved应该就是记录内核的大小,不过下面它还要做些事

 

下面和initrd的使用有关

为了能够使用RAM disk你的内核必须要支持RAMdisk,即:在编译内核时,要选中RAMdisk support这一选项,会在配置文件中定义CONFIG_BLK_DEV_RAM。
       为了让内核有能力在内核加载阶段就能装入RAMDISK,并运行其中的内容,要选中initial RAM disk(initrd) support 选项,会在配置文件中定义CONFIG_BLK_DEV_INITRD。

 

http://wenku.baidu.com/view/dc6dc785bceb19e8b8f6baba.html

此链接是一篇关于initramfs和initrd的文章,有兴趣看看

Initrd是一个临时文件系统。在某些没有存储设备的嵌入式系统中,initrd是永久根文件系统

它就是个文件系统,不过很小

initrd 中包含了实现这个目标所需要的目录和可执行程序的最小集合,例如将内核模块加载到内核中所使用的insmod 工具。

initrd 映像中包含了支持 Linux系统两阶段引导过程所需要的必要可执行程序和系统文件。

咱们看看它有什么

http://blog.163.com/dongfeng_114/blog/static/4664357420112452442211/

查看initrd的内容方法

我的是用cpio方法的,下面是我的pc中initrd的内容

[root@localhost tempfs]# ls

bin  dev  etc init  initrd.img  lib proc  sbin  sys sysroot

[root@localhost tempfs]# ls dev/

console  ptmx  ram1   tty   tty10  tty2 tty5  tty8   ttyS1 zero

mapper   ram   rtc    tty0  tty11  tty3 tty6  tty9   ttyS2

null     ram0  systty tty1  tty12  tty4 tty7  ttyS0  ttyS3

[root@localhost tempfs]# ls sbin/

dmraid  insmod  kpartx lvm  modprobe  nash

最后说一下uboot的bootargs 要设置initrd=addr,[Size]M

大家看看自己思考思考吧,我们看下面的内存处理

 

#ifdef CONFIG_BLK_DEV_INITRD

先说phys_initrd_size,它初始化定义是0

static int __init parse_tag_initrd(const struct tag *tag)

{

       printk(KERN_WARNING "ATAG_INITRD is deprecated; "

               "please update your bootloader.\n");

        phys_initrd_start= __virt_to_phys(tag->u.initrd.start);

        phys_initrd_size =tag->u.initrd.size;

        return 0;

}

 

__tagtable(ATAG_INITRD, parse_tag_initrd);

上面的东西看过我上一篇《linux内核启动1》应该不会陌生吧,就是把bootcmdline的root=后面的赋值到phys_initrd_start,phys_initrd_size;

        if(phys_initrd_size &&

           !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {

memblock_is_region_memory函数就是和memblock.memory比较看在不在此内存里面

                pr_err("INITRD:0x%08lx+0x%08lx is not a memory region - disabling initrd\n",

                      phys_initrd_start, phys_initrd_size);

如果你看到这个打印,就是initrd不在可用内核范围内

               phys_initrd_start = phys_initrd_size = 0;

        }

        if(phys_initrd_size &&

           memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {

memblock_is_region_reserved当然是和memblock.reserved比较

                pr_err("INITRD:0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n",

                      phys_initrd_start, phys_initrd_size);

如果你看到这个打印,就是initrd和内核重叠了

               phys_initrd_start = phys_initrd_size = 0;

        }

上面的两个判断就是在可用内存内且不能和内核重叠

        if(phys_initrd_size) {

               memblock_reserve(phys_initrd_start, phys_initrd_size);上面已解释过

 

                /* Nowconvert initrd to virtual addresses */

               initrd_start = __phys_to_virt(phys_initrd_start);转换为虚拟地址

                initrd_end= initrd_start + phys_initrd_size;

        }

#endif

reserved又记录initrd信息

       arm_mm_memblock_reserve();

void __init arm_mm_memblock_reserve(void)

{

......

       memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);

……

存储页表空间

arm页表地址

c0004000 A swapper_pg_dir

arch/arm/kernel/head.S下

.equ    swapper_pg_dir,KERNEL_RAM_VADDR - PG_DIR_SIZE

#define PG_DIR_SIZE    0x4000

KERNEL_RAM_VADDR是0x30008000

       arm_dt_memblock_reserve();这个保存设备树的启动参数,不细看

 

        /* reserve anyplatform specific memblock areas */

        if(mdesc->reserve)对应平台自定义的block区

               mdesc->reserve();

 

       memblock_analyze();更新memblock中memory_size的值

       memblock_dump_all();这个就是打印reserved和memory,和我中间加的打印东西差不多

 

【作者】张昺华
【新浪微博】 张昺华--sky
【twitter】 @sky2030_
【facebook】 张昺华 zhangbinghua
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利.
上一篇:Oracle学习——第二讲(函数)


下一篇:技术实践第四期|解读移动开发者日常-性能监控平台应用