转自:http://blog.csdn.net/boymax2/article/details/52550197
版权声明:本文为博主原创文章,未经博主允许不得转载。 Magenta内核支持虚拟地址的配置,依赖于cpu内的mmu模块。 下面会从以下几个方面对Magenta内核内存管理方面的代码进行分析: 1、mmu初始化,也就是硬件mmu的初始化,以底层寄存器操作为主,汇编 2、pmm初始化,也就是代码中物理内存结构的初始化 3、vmm初始化,也就是代码中虚拟内存结构的初始化 mmu初始化 mmu初始化的代码由汇编完成,其中主要涉及了以下几个结构 TLB:内存映射表,其定义位于c代码中 kernel/arch/arm/arm/mmu.c [cpp] view plain copy uint32_t arm_kernel_translation_table[TT_ENTRY_COUNT] __ALIGNED(16384) __SECTION(".bss.prebss.translation_table"); 以及初始化的内存映射关系,以qemu-virt平台 kernel/platform/qemu-virt/platform.c [cpp] view plain copy struct mmu_initial_mapping mmu_initial_mappings[] = { /* all of memory */ { .phys = MEMORY_BASE_PHYS, // 内存物理基地址 .virt = KERNEL_BASE, // 内存虚拟基地址 .size = MEMORY_APERTURE_SIZE,// 虚拟内存大小 .flags = 0, .name = "memory" }, /* 1GB of peripherals */ { .phys = PERIPHERAL_BASE_PHYS, // 外设物理基地址 .virt = PERIPHERAL_BASE_VIRT, // 外设虚拟基地址 .size = PERIPHERAL_BASE_SIZE, // 虚拟内存大小 .flags = MMU_INITIAL_MAPPING_FLAG_DEVICE, .name = "peripherals" }, /* null entry to terminate the list */ { 0 } }; 这两个结构都会在之后的汇编代码中使用。 mmu初始化的汇编代码位于内核的启动文件中,以arm32为例 自己对arm汇编不是很熟悉,在读汇编代码时花费了比较多的时间,希望有错误能指正出来 启动文件中与mmu相关的代码已经提取出来 在其中主要涉及到的操作为以下几个: 1、重置mmu相关寄存器 2、计算物理地址相对虚拟地址的偏移 3、将tlb地址指向空间清零 4、遍历mmu_initial_mappings结构,计算后写入tlb 5、设置mmu相关寄存器 6、跳转至c代码 kernel/arch/arm/arm/start.S [plain] view plain copy #include <asm.h> #include <arch/arm/cores.h> #include <arch/arm/mmu.h> #include <kernel/vm.h> .section ".text.boot" .globl _start _start: b platform_reset b arm_undefined b arm_syscall b arm_prefetch_abort b arm_data_abort b arm_reserved b arm_irq b arm_fiq #if WITH_SMP b arm_reset #endif .weak platform_reset platform_reset: /* Fall through for the weak symbol */ // arm复位处理程序 .globl arm_reset arm_reset: /* do some early cpu setup */ // 读SCTLR寄存器,手册P1711 mrc p15, 0, r12, c1, c0, 0 /* i/d cache disable, mmu disabled */ // cache位与mmu位置0 bic r12, #(1<<12) bic r12, #(1<<2 | 1<<0) #if WITH_KERNEL_VM /* enable caches so atomics and spinlocks work */ // cache位与mmu位置1 orr r12, r12, #(1<<12) orr r12, r12, #(1<<2) #endif // WITH_KERNEL_VM // 写SCTLR寄存器 mcr p15, 0, r12, c1, c0, 0 /* calculate the physical offset from our eventual virtual location */ // 计算物理地址相对虚拟地址的偏移,用于之后的转换 .Lphys_offset: ldr r4, =.Lphys_offset adr r11, .Lphys_offset sub r11, r11, r4 ... #if ARM_WITH_MMU .Lsetup_mmu: /* set up the mmu according to mmu_initial_mappings */ /* load the base of the translation table and clear the table */ // 获取转换表地址 ldr r4, =arm_kernel_translation_table // 获取转换表物理地址 add r4, r4, r11 /* r4 = physical address of translation table */ mov r5, #0 mov r6, #0 /* walk through all the entries in the translation table, setting them up */ // 遍历转换表结构清零 0: str r5, [r4, r6, lsl #2] add r6, #1 cmp r6, #4096 bne 0b /* load the address of the mmu_initial_mappings table and start processing */ // 获取初始映射地址 ldr r5, =mmu_initial_mappings // 获取初始映射物理地址 add r5, r5, r11 /* r5 = physical address of mmu initial mapping table */ // 初始映射遍历绑定至转换表 // 转换表的绑定 转换表中元素的高12位为物理基地址下标,低20位为mmu相关flag .Linitial_mapping_loop: // 把结构体加载到各个通用寄存器中 ldmia r5!, { r6-r10 } /* r6 = phys, r7 = virt, r8 = size, r9 = flags, r10 = name */ /* round size up to 1MB alignment */ // 上调size对齐1MB ubfx r10, r6, #0, #20 add r8, r8, r10 add r8, r8, #(1 << 20) sub r8, r8, #1 /* mask all the addresses and sizes to 1MB boundaries */ // 物理地址 虚拟地址 大小 右移20位 取高12位 lsr r6, #20 /* r6 = physical address / 1MB */ lsr r7, #20 /* r7 = virtual address / 1MB */ lsr r8, #20 /* r8 = size in 1MB chunks */ /* if size == 0, end of list */ // 循环边界判断 cmp r8, #0 beq .Linitial_mapping_done /* set up the flags */ // 设置mmu相关flag,放置在r10 ldr r10, =MMU_KERNEL_L1_PTE_FLAGS teq r9, #MMU_INITIAL_MAPPING_FLAG_UNCACHED ldreq r10, =MMU_INITIAL_MAP_STRONGLY_ORDERED beq 0f teq r9, #MMU_INITIAL_MAPPING_FLAG_DEVICE ldreq r10, =MMU_INITIAL_MAP_DEVICE /* r10 = mmu entry flags */ 0: // 计算translation_table元素的值 // r10:mmu相关flag r6:物理地址高12位 // r12 = r10 | (r6 << 20) // 高20位为物理地址,低12位为mmu相关flag orr r12, r10, r6, lsl #20 /* r12 = phys addr | flags */ /* store into appropriate translation table entry */ // r4:转换表物理基地址 r7:虚拟地址对应的section // r12 -> [r4 + r7 << 2] str r12, [r4, r7, lsl #2] /* loop until we're done */ // 准备下一个转换表元素的填充 add r6, #1 add r7, #1 subs r8, #1 bne 0b b .Linitial_mapping_loop .Linitial_mapping_done: ... /* set up the mmu */ bl .Lmmu_setup #endif // WITH_KERNEL_VM ... // 跳转至c程序 bl lk_main b . #if WITH_KERNEL_VM /* per cpu mmu setup, shared between primary and secondary cpus args: r4 == translation table physical r8 == final translation table physical (if using trampoline) */ // 设置mmu相关寄存器 // r4:转换表物理基地址 // mmu相关寄存器 手册P1724 .Lmmu_setup: /* Invalidate TLB */ mov r12, #0 mcr p15, 0, r12, c8, c7, 0 isb /* Write 0 to TTBCR */ // ttbcr写0 mcr p15, 0, r12, c2, c0, 2 isb /* Set cacheable attributes on translation walk */ // 宏MMU_TTBRx_FLAGS为 (1 << 3) | (1 << 6) orr r12, r4, #MMU_TTBRx_FLAGS /* Write ttbr with phys addr of the translation table */ // 写入ttbr0 mcr p15, 0, r12, c2, c0, 0 isb /* Write DACR */ // 写DACR cache相关 mov r12, #0x1 mcr p15, 0, r12, c3, c0, 0 isb /* Read SCTLR into r12 */ // 读SCTLR寄存器,手册P1711 mrc p15, 0, r12, c1, c0, 0 /* Disable TRE/AFE */ // 禁用TRE和AFE标志位 bic r12, #(1<<29 | 1<<28) /* Turn on the MMU */ // MMU使能标志位 orr r12, #0x1 /* Write back SCTLR */ // 写入SCTLR // MMU打开 mcr p15, 0, r12, c1, c0, 0 isb /* Jump to virtual code address */ // 跳转 ldr pc, =1f 1: ... /* Invalidate TLB */ mov r12, #0 mcr p15, 0, r12, c8, c7, 0 isb /* assume lr was in physical memory, adjust it before returning */ // 计算跳转点的虚拟地址,跳转,之后会调用lk_main sub lr, r11 bx lr #endif ... 硬件层的内存管理相关的初始化基本完成后,会跳转到c代码 位于kernel/top/main.c 其中有关内存管理的函数调用顺序为: 1、pmm_add_arena 将物理内存加入pmm结构 2、vm_init_preheap 堆初始化前的准备工作(钩子) 3、heap_init 堆的初始化 4、vm_init_postheap 堆初始化后的工作(钩子) 5、arm_mmu_init mmu相关的调整 首先要完成pmm初始化工作 pmm初始化主要分为以下几步: 1、通过fdt库从bootloader中获取物理内存的长度 2、在pmm中加入物理内存 3、标记fdt结构的空间 4、标记bootloader相关的空间 pmm中比较重要的一个结构体,pmm_arena_t代表着一块物理内存的抽象 kernel/include/kernel/vm.h [cpp] view plain copy typedef struct pmm_arena { struct list_node node; // 节点,物理内存链表 const char* name; // 名称 uint flags; uint priority; paddr_t base; // 物理内存基地址 size_t size; // 物理内存长度 size_t free_count; // 空闲的页数 struct vm_page* page_array; // 页结构数组 struct list_node free_list; // 节点,该内存中空闲空间的链表 } pmm_arena_t; 接着以qemu-virt的platform为例,分析pmm初始化的过程 kernel/platform/qemu-virt.c [cpp] view plain copy // 全局物理内存结构体 static pmm_arena_t arena = { .name = "ram", .base = MEMORY_BASE_PHYS, .size = DEFAULT_MEMORY_SIZE, .flags = PMM_ARENA_FLAG_KMAP, }; ... // 该函数为平台的早期初始化,在内核启动时调用 void platform_early_init(void) { ... /* look for a flattened device tree just before the kernel */ // 获取fdt结构 const void *fdt = (void *)KERNEL_BASE; int err = fdt_check_header(fdt); if (err >= 0) { /* walk the nodes, looking for 'memory' and 'chosen' */ int depth = 0; int offset = 0; for (;;) { offset = fdt_next_node(fdt, offset, &depth); if (offset < 0) break; /* get the name */ const char *name = fdt_get_name(fdt, offset, NULL); if (!name) continue; /* look for the properties we care about */ // 从fdt中查找到内存信息 if (strcmp(name, "memory") == 0) { int lenp; const void *prop_ptr = fdt_getprop(fdt, offset, "reg", &lenp); if (prop_ptr && lenp == 0x10) { /* we're looking at a memory descriptor */ //uint64_t base = fdt64_to_cpu(*(uint64_t *)prop_ptr); // 获取内存长度 uint64_t len = fdt64_to_cpu(*((const uint64_t *)prop_ptr + 1)); /* trim size on certain platforms */ #if ARCH_ARM // 如果是32位arm,只使用内存前1GB if (len > 1024*1024*1024U) { len = 1024*1024*1024; /* only use the first 1GB on ARM32 */ printf("trimming memory to 1GB\n"); } #endif /* set the size in the pmm arena */ // 保存内存长度 arena.size = len; } } else if (strcmp(name, "chosen") == 0) { ... } } } /* add the main memory arena */ // 将改内存区域加入到pmm中 pmm_add_arena(&arena); /* reserve the first 64k of ram, which should be holding the fdt */ // 标记fdt区域 pmm_alloc_range(MEMBASE, 0x10000 / PAGE_SIZE, NULL); // 标记bootloader_ramdisk区域 platform_preserve_ramdisk(); ... } 内核在接下来初始化堆之前会在内存中构造出出一个VmAspace对象,其代表的是内核空间的抽象 kernel/kernel/vm/vm.cpp [cpp] view plain copy void vm_init_preheap(uint level) { LTRACE_ENTRY; // allow the vmm a shot at initializing some of its data structures // 构造代表内核空间的VmAspace对象 VmAspace::KernelAspaceInitPreHeap(); // mark all of the kernel pages in use LTRACEF("marking all kernel pages as used\n"); // 标记内核代码所用内存 mark_pages_in_use((vaddr_t)&_start, ((uintptr_t)&_end - (uintptr_t)&_start)); // mark the physical pages used by the boot time allocator // 标记boot time allocator代码所用内存 if (boot_alloc_end != boot_alloc_start) { LTRACEF("marking boot alloc used from 0x%lx to 0x%lx\n", boot_alloc_start, boot_alloc_end); mark_pages_in_use(boot_alloc_start, boot_alloc_end - boot_alloc_start); } } kernel/kernel/vm/vm_aspace.cpp [cpp] view plain copy void VmAspace::KernelAspaceInitPreHeap() { // the singleton kernel address space // 构造一个内核空间单例,因为这个函数只会在启动时调用,所以是这个对象是单例 static VmAspace _kernel_aspace(KERNEL_ASPACE_BASE, KERNEL_ASPACE_SIZE, VmAspace::TYPE_KERNEL, "kernel"); // 初始化 auto err = _kernel_aspace.Init(); ASSERT(err >= 0); // save a pointer to the singleton kernel address space // 保存单例指针 VmAspace::kernel_aspace_ = &_kernel_aspace; } VmAspace::VmAspace(vaddr_t base, size_t size, uint32_t flags, const char* name) : base_(base), size_(size), flags_(flags) { DEBUG_ASSERT(size != 0); DEBUG_ASSERT(base + size - 1 >= base); Rename(name); LTRACEF("%p '%s'\n", this, name_); } status_t VmAspace::Init() { DEBUG_ASSERT(magic_ == MAGIC); LTRACEF("%p '%s'\n", this, name_); // intialize the architectually specific part // 标记为内核的空间 bool is_high_kernel = (flags_ & TYPE_MASK) == TYPE_KERNEL; uint arch_aspace_flags = is_high_kernel ? ARCH_ASPACE_FLAG_KERNEL : 0; // 调用mmu相关的函数 return arch_mmu_init_aspace(&arch_aspace_, base_, size_, arch_aspace_flags); } kernel/arch/arm/arm/mmu.c [cpp] view plain copy status_t arch_mmu_init_aspace(arch_aspace_t *aspace, vaddr_t base, size_t size, uint flags) { LTRACEF("aspace %p, base 0x%lx, size 0x%zx, flags 0x%x\n", aspace, base, size, flags); DEBUG_ASSERT(aspace); DEBUG_ASSERT(aspace->magic != ARCH_ASPACE_MAGIC); /* validate that the base + size is sane and doesn't wrap */ DEBUG_ASSERT(size > PAGE_SIZE); DEBUG_ASSERT(base + size - 1 > base); // 初始化内核空间中页的链表 list_initialize(&aspace->pt_page_list); aspace->magic = ARCH_ASPACE_MAGIC; if (flags & ARCH_ASPACE_FLAG_KERNEL) { // 设置结构内相关参数,其中转换表的物理内存通过vaddr_to_paddr获取 // 该函数不详细分析了,实质就是通过转换表进行查询得到的物理地址 aspace->base = base; aspace->size = size; aspace->tt_virt = arm_kernel_translation_table; aspace->tt_phys = vaddr_to_paddr(aspace->tt_virt); } else { ... } LTRACEF("tt_phys 0x%lx tt_virt %p\n", aspace->tt_phys, aspace->tt_virt); return NO_ERROR; } 到此内核空间的结构初始化完成 接下来进行内核堆的初始化,Magenta内核中提供了两种堆的实现miniheap以及cmpctmalloc,用户可以自己进行配置。 堆的具体实现方法会在之后进行具体的分析 堆的初始化完成以后,会调用相应的钩子函数,该函数的主要的作用如下: 1、在vmm结构中标记内核已使用的虚拟地址 2、根据内核使用的地址的区域,分别设置内存的保护 [cpp] view plain copy void vm_init_postheap(uint level) { LTRACE_ENTRY; vmm_aspace_t* aspace = vmm_get_kernel_aspace(); // we expect the kernel to be in a temporary mapping, define permanent // regions for those now struct temp_region { const char* name; vaddr_t base; size_t size; uint arch_mmu_flags; } regions[] = { { .name = "kernel_code", .base = (vaddr_t)&__code_start, .size = ROUNDUP((size_t)&__code_end - (size_t)&__code_start, PAGE_SIZE), .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_EXECUTE, }, { .name = "kernel_rodata", .base = (vaddr_t)&__rodata_start, .size = ROUNDUP((size_t)&__rodata_end - (size_t)&__rodata_start, PAGE_SIZE), .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ, }, { .name = "kernel_data", .base = (vaddr_t)&__data_start, .size = ROUNDUP((size_t)&__data_end - (size_t)&__data_start, PAGE_SIZE), .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE, }, { .name = "kernel_bss", .base = (vaddr_t)&__bss_start, .size = ROUNDUP((size_t)&__bss_end - (size_t)&__bss_start, PAGE_SIZE), .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE, }, { .name = "kernel_bootalloc", .base = (vaddr_t)boot_alloc_start, .size = ROUNDUP(boot_alloc_end - boot_alloc_start, PAGE_SIZE), .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE, }, }; for (uint i = 0; i < countof(regions); ++i) { temp_region* region = ®ions[i]; ASSERT(IS_PAGE_ALIGNED(region->base)); status_t status = vmm_reserve_space(aspace, region->name, region->size, region->base); ASSERT(status == NO_ERROR); status = vmm_protect_region(aspace, region->base, region->arch_mmu_flags); ASSERT(status == NO_ERROR); } // mmu_initial_mappings should reflect where we are now, use it to construct the actual // mappings. We will carve out the kernel code/data from any mappings and // unmap any temporary ones. const struct mmu_initial_mapping* map = mmu_initial_mappings; for (map = mmu_initial_mappings; map->size > 0; ++map) { LTRACEF("looking at mapping %p (%s)\n", map, map->name); // Unmap temporary mappings except where they intersect with the // kernel code/data regions. vaddr_t vaddr = map->virt; LTRACEF("vaddr 0x%lx, virt + size 0x%lx\n", vaddr, map->virt + map->size); while (vaddr != map->virt + map->size) { vaddr_t next_kernel_region = map->virt + map->size; vaddr_t next_kernel_region_end = map->virt + map->size; // Find the kernel code/data region with the lowest start address // that is within this mapping. for (uint i = 0; i < countof(regions); ++i) { temp_region* region = ®ions[i]; if (region->base >= vaddr && region->base < map->virt + map->size && region->base < next_kernel_region) { next_kernel_region = region->base; next_kernel_region_end = region->base + region->size; } } // If vaddr isn't the start of a kernel code/data region, then we should make // a mapping between it and the next closest one. if (next_kernel_region != vaddr) { status_t status = vmm_reserve_space(aspace, map->name, next_kernel_region - vaddr, vaddr); ASSERT(status == NO_ERROR); if (map->flags & MMU_INITIAL_MAPPING_TEMPORARY) { // If the region is part of a temporary mapping, immediately unmap it LTRACEF("Freeing region [%016lx, %016lx)\n", vaddr, next_kernel_region); status = vmm_free_region(aspace, vaddr); ASSERT(status == NO_ERROR); } else { // Otherwise, mark it no-exec since it's not explicitly code status = vmm_protect_region( aspace, vaddr, ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE); ASSERT(status == NO_ERROR); } } vaddr = next_kernel_region_end; } } } 以上代码中涉及到的几个函数,只是做下简单的介绍,不具体分析: vmm_reserve_space:在vmm中标记一块虚拟内存,这块虚拟内存抽象为VmRegion类,拥有自己的底层mmu相关的配置 vmm_protect_region:对某VmRegion对应的虚拟内存设置内存保护的相关参数 mmu相关的调整 mmu相关的调整,由内核新建的bootstrap2线程进行调用arch_init完成 kernel/arch/arm/arm/arch.c [cpp] view plain copy void arch_init(void) { ... #if ARM_WITH_MMU /* finish intializing the mmu */ arm_mmu_init(); #endif } kernel/arch/arm/arm/mmu.c [cpp] view plain copy void arm_mmu_init(void) { /* unmap the initial mapings that are marked temporary */ // 解除具有MMU_INITIAL_MAPPING_TEMPORARY标志的内存映射 struct mmu_initial_mapping *map = mmu_initial_mappings; while (map->size > 0) { if (map->flags & MMU_INITIAL_MAPPING_TEMPORARY) { vaddr_t va = map->virt; size_t size = map->size; DEBUG_ASSERT(IS_SECTION_ALIGNED(size)); while (size > 0) { arm_mmu_unmap_l1_entry(arm_kernel_translation_table, va / SECTION_SIZE); va += MB; size -= MB; } } map++; } arm_after_invalidate_tlb_barrier(); #if KERNEL_ASPACE_BASE != 0 /* bounce the ttbr over to ttbr1 and leave 0 unmapped */ // 重新设置mmu相关的寄存器,禁用ttbcr0,将原先ttbr0的映射移动到ttbr1 // ttbr1为内核空间使用的寄存器 uint32_t n = __builtin_clz(KERNEL_ASPACE_BASE) + 1; DEBUG_ASSERT(n <= 7); uint32_t ttbcr = (1<<4) | n; /* disable TTBCR0 and set the split between TTBR0 and TTBR1 */ arm_write_ttbr1(arm_read_ttbr0()); ISB; arm_write_ttbcr(ttbcr); ISB; arm_write_ttbr0(0); ISB; #endif } 至此Magenta内核有关内存管理的初始化完成。
【作者】张昺华
【新浪微博】 张昺华--sky
【twitter】 @sky2030_
【facebook】 张昺华 zhangbinghua
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利.