在移植u-boot的过程看到过u-boot在重定向时的实现,当时不知道怎么就觉得很好理解就把这个知识点没怎么深入的理解,最近在看华为的鸿蒙OS在Cortex-A平台上的实现过程时再次遇到一时间看不太懂了,所以花了点时间研究了一下这里做一下记录,后续有时间再把u-boot的实现再复盘一下加深理解。具体的代码如下
1 /* 2 * Copyright (c) 2013-2019, Huawei Technologies Co., Ltd. All rights reserved. 3 * Copyright (c) 2020, Huawei Device Co., Ltd. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without modification, 6 * are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this list of 9 * conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 12 * of conditions and the following disclaimer in the documentation and/or other materials 13 * provided with the distribution. 14 * 15 * 3. Neither the name of the copyright holder nor the names of its contributors may be used 16 * to endorse or promote products derived from this software without specific prior written 17 * permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #define ASSEMBLY 33 #include "arch_config.h" 34 #include "los_vm_boot.h" 35 #include "los_vm_zone.h" 36 #include "los_mmu_descriptor_v6.h" 37 #undef ASSEMBLY 38 39 40 .global __exc_stack_top 41 .global __irq_stack_top 42 .global __fiq_stack_top 43 .global __svc_stack_top 44 .global __abt_stack_top 45 .global __undef_stack_top 46 .global __exc_stack 47 .global __irq_stack 48 .global __fiq_stack 49 .global __svc_stack 50 .global __abt_stack 51 .global __undef_stack 52 53 .extern __bss_start 54 .extern __bss_end 55 .extern hal_clock_initialize_start 56 .extern los_bss_init 57 .extern _osExceptFiqHdl 58 .extern _osExceptAddrAbortHdl 59 .extern _osExceptDataAbortHdl 60 .extern _osExceptPrefetchAbortHdl 61 .extern _osExceptSwiHdl 62 .extern _osExceptUndefInstrHdl 63 .extern __stack_chk_guard_setup 64 .extern g_firstPageTable 65 .extern g_mmuJumpPageTable 66 67 .equ MPIDR_CPUID_MASK, 0xffU 68 69 .fpu vfpv4 70 .arm 71 72 /* param0 is stack bottom, param1 is stack size, r11 hold cpu id */ 73 .macro EXC_SP_SET param0, param1 74 ldr r1, =\param0 75 mov r0, \param1 76 bl sp_set 77 .endm 78 79 /* param0 is stack top, param1 is stack size, param2 is magic num */ 80 .macro STACK_MAGIC_SET param0, param1, param2 81 ldr r0, =\param0 82 mov r1, \param1 83 ldr r2, =\param2 84 bl excstack_magic 85 .endm 86 87 /* param0 is physical address, param1 virtual address, param2 is sizes, param3 is flag */ 88 .macro PAGE_TABLE_SET param0, param1, param2, param3 89 ldr r6, =\param0 90 ldr r7, =\param1 91 ldr r8, =\param2 92 ldr r10, =\param3 93 bl page_table_build 94 .endm 95 .code 32 96 .section ".vectors","ax" 97 98 __exception_handlers: 99 /* 100 *Assumption: ROM code has these vectors at the hardware reset address. 101 *A simple jump removes any address-space dependencies [i.e. safer] 102 */ 103 b reset_vector 104 b _osExceptUndefInstrHdl 105 b _osExceptSwiHdl 106 b _osExceptPrefetchAbortHdl 107 b _osExceptDataAbortHdl 108 b _osExceptAddrAbortHdl 109 b OsIrqHandler 110 b _osExceptFiqHdl 111 112 /* Startup code which will get the machine into supervisor mode */ 113 .global reset_vector 114 .type reset_vector,function 115 reset_vector: 116 /* do some early cpu setup: i/d cache disable, mmu disabled */ 117 mrc p15, 0, r0, c1, c0, 0 118 bic r0, #(1<<12) 119 bic r0, #(1<<2 | 1<<0) 120 mcr p15, 0, r0, c1, c0, 0 121 122 /* r11: delta of physical address and virtual address */ 123 adr r11, pa_va_offset;此时r11为物理地址 具体原因是硬件决定了第一条指令的地址,当执行到这里pc此时是当前的指令的地址(自然是物理地址) 124 ;然后而adr伪指令的作用就是得到了当前标识pa_va_offset和当前指令的offset和保存在r11,而代码的实现在这个标识处定 125 ;义了一个连接地址相关的标识"."所以按照程序连接指定的运行地址(虚拟的)这里保存的值肯定是连接实际的虚拟运行地址所以r0为虚拟地址 126 ldr r0, [r11] 127 sub r11, r11, r0 ;进而物理地址减去虚拟地址(连接地址)即就是物理地址和虚拟地址的差。 128 129 /* if we need to relocate to proper location or not */ 130 adr r4, __exception_handlers /* r4: base of load address */ 131 ldr r5, =SYS_MEM_BASE /* r5: base of physical address */ 132 subs r12, r4, r5 /* r12: delta of load address and physical address */ 133 beq reloc_img_to_bottom_done /* if we load image at the bottom of physical address */ 134 135 /* we need to relocate image at the bottom of physical address */ 136 ldr r7, =__exception_handlers /* r7: base of linked address (or vm address) */ 137 ldr r6, =__bss_start /* r6: end of linked address (or vm address) */ 138 sub r6, r7 /* r6: delta of linked address (or vm address) */ 139 add r6, r4 /* r6: end of load address */ 140 141 reloc_img_to_bottom_loop: 142 ldr r7, [r4], #4 143 str r7, [r5], #4 144 cmp r4, r6 145 bne reloc_img_to_bottom_loop 146 sub pc, r12 147 nop 148 sub r11, r11, r12 /* r11: eventual address offset */ 149 150 reloc_img_to_bottom_done: 151 ldr r4, =g_firstPageTable /* r4: physical address of translation table and clear it */ 152 add r4, r4, r11 153 bl page_table_clear 154 155 PAGE_TABLE_SET SYS_MEM_BASE, KERNEL_VMM_BASE, KERNEL_VMM_SIZE, MMU_DESCRIPTOR_KERNEL_L1_PTE_FLAGS 156 PAGE_TABLE_SET SYS_MEM_BASE, UNCACHED_VMM_BASE, UNCACHED_VMM_SIZE, MMU_INITIAL_MAP_STRONGLY_ORDERED 157 PAGE_TABLE_SET PERIPH_PMM_BASE, PERIPH_DEVICE_BASE, PERIPH_DEVICE_SIZE, MMU_INITIAL_MAP_DEVICE 158 PAGE_TABLE_SET PERIPH_PMM_BASE, PERIPH_CACHED_BASE, PERIPH_CACHED_SIZE, MMU_DESCRIPTOR_KERNEL_L1_PTE_FLAGS 159 PAGE_TABLE_SET PERIPH_PMM_BASE, PERIPH_UNCACHED_BASE, PERIPH_UNCACHED_SIZE, MMU_INITIAL_MAP_STRONGLY_ORDERED 160 161 orr r8, r4, #MMU_TTBRx_FLAGS /* r8 = r4 and set cacheable attributes on translation walk */ 162 ldr r4, =g_mmuJumpPageTable /* r4: jump pagetable vaddr */ 163 add r4, r4, r11 164 ldr r4, [r4] 165 add r4, r4, r11 /* r4: jump pagetable paddr */ 166 bl page_table_clear 167 168 /* build 1M section mapping, in order to jump va during turing on mmu:pa == pa, va == pa */ 169 mov r6, pc 170 mov r7, r6 /* r7: pa (MB aligned)*/ 171 lsr r6, r6, #20 /* r6: va l1 index */ 172 ldr r10, =MMU_DESCRIPTOR_KERNEL_L1_PTE_FLAGS 173 add r12, r10, r6, lsl #20 /* r12: pa |flags */ 174 str r12, [r4, r7, lsr #(20 - 2)] /* jumpTable[paIndex] = pt entry */ 175 rsb r7, r11, r6, lsl #20 /* r7: va */ 176 str r12, [r4, r7, lsr #(20 - 2)] /* jumpTable[vaIndex] = pt entry */ 177 178 bl mmu_setup /* set up the mmu */ 179 180 /* get cpuid and keep it in r11 */ 181 mrc p15, 0, r11, c0, c0, 5 182 and r11, r11, #MPIDR_CPUID_MASK 183 cmp r11, #0 184 bne excstatck_loop_done 185 186 excstatck_loop: 187 /* clear out the interrupt and exception stack and set magic num to check the overflow */ 188 ldr r0, =__undef_stack 189 ldr r1, =__exc_stack_top 190 bl stack_init 191 192 STACK_MAGIC_SET __undef_stack, #OS_EXC_UNDEF_STACK_SIZE, OS_STACK_MAGIC_WORD 193 STACK_MAGIC_SET __abt_stack, #OS_EXC_ABT_STACK_SIZE, OS_STACK_MAGIC_WORD 194 STACK_MAGIC_SET __irq_stack, #OS_EXC_IRQ_STACK_SIZE, OS_STACK_MAGIC_WORD 195 STACK_MAGIC_SET __fiq_stack, #OS_EXC_FIQ_STACK_SIZE, OS_STACK_MAGIC_WORD 196 STACK_MAGIC_SET __svc_stack, #OS_EXC_SVC_STACK_SIZE, OS_STACK_MAGIC_WORD 197 STACK_MAGIC_SET __exc_stack, #OS_EXC_STACK_SIZE, OS_STACK_MAGIC_WORD 198 199 excstatck_loop_done: 200 warm_reset: 201 /* initialize interrupt/exception environments */ 202 mov r0, #(CPSR_IRQ_DISABLE |CPSR_FIQ_DISABLE|CPSR_IRQ_MODE) 203 msr cpsr, r0 204 EXC_SP_SET __irq_stack_top, #OS_EXC_IRQ_STACK_SIZE 205 206 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_UNDEF_MODE) 207 msr cpsr, r0 208 EXC_SP_SET __undef_stack_top, #OS_EXC_UNDEF_STACK_SIZE 209 210 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_ABT_MODE) 211 msr cpsr, r0 212 EXC_SP_SET __abt_stack_top, #OS_EXC_ABT_STACK_SIZE 213 214 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_FIQ_MODE) 215 msr cpsr, r0 216 EXC_SP_SET __fiq_stack_top, #OS_EXC_FIQ_STACK_SIZE 217 218 /* initialize CPSR (machine state register) */ 219 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_SVC_MODE) 220 msr cpsr, r0 221 222 /* Note: some functions in LIBGCC1 will cause a "restore from SPSR"!! */ 223 msr spsr, r0 224 225 /* set svc stack, every cpu has OS_EXC_SVC_STACK_SIZE stack */ 226 ldr r0, =__svc_stack_top 227 mov r2, #OS_EXC_SVC_STACK_SIZE 228 mul r2, r2, r11 229 sub r0, r0, r2 230 mov sp, r0 231 232 /* enable fpu+neon */ 233 MRC p15, 0, r0, c1, c1, 2 234 ORR r0, r0, #0xC00 235 BIC r0, r0, #0xC000 236 MCR p15, 0, r0, c1, c1, 2 237 238 LDR r0, =(0xF << 20) 239 MCR p15, 0, r0, c1, c0, 2 240 241 MOV r3, #0x40000000 242 VMSR FPEXC, r3 243 244 LDR r0, =__exception_handlers 245 MCR p15, 0, r0, c12, c0, 0 246 247 cmp r11, #0 248 bne cpu_start 249 250 clear_bss: 251 ldr r1, =__bss_start 252 ldr r2, =__bss_end 253 mov r0, #0 254 255 bss_loop: 256 cmp r1, r2 257 strlo r0, [r1], #4 258 blo bss_loop 259 260 #if defined(LOSCFG_CC_STACKPROTECTOR_ALL) || \ 261 defined(LOSCFG_CC_STACKPROTECTOR_STRONG) || \ 262 defined(LOSCFG_CC_STACKPROTECTOR) 263 bl __stack_chk_guard_setup 264 #endif 265 266 #ifdef LOSCFG_GDB_DEBUG 267 /* GDB_START - generate a compiled_breadk,This function will get GDB stubs started, with a proper environment */ 268 bl GDB_START 269 .word 0xe7ffdeff 270 #endif 271 272 bl main 273 274 _start_hang: 275 b _start_hang 276 277 mmu_setup: 278 mov r12, #0 279 mcr p15, 0, r12, c8, c7, 0 /* Set c8 to control the TLB and set the mapping to invalid */ 280 isb 281 282 mcr p15, 0, r12, c2, c0, 2 /* Initialize the c2 register */ 283 isb 284 285 orr r12, r4, #MMU_TTBRx_FLAGS 286 mcr p15, 0, r12, c2, c0, 0 /* Set attributes and set temp page table */ 287 isb 288 289 mov r12, #0x7 /* 0b0111 */ 290 mcr p15, 0, r12, c3, c0, 0 /* Set DACR with 0b0111, client and manager domian */ 291 isb 292 293 mrc p15, 0, r12, c1, c0, 0 294 bic r12, #(1 << 29 | 1 << 28) 295 orr r12, #(1 << 0) 296 bic r12, #(1 << 1) 297 orr r12, #(1 << 2) 298 orr r12, #(1 << 12) 299 mcr p15, 0, r12, c1, c0, 0 /* Set SCTLR with r12: Turn on the MMU, I/D cache Disable TRE/AFE */ 300 isb 301 302 ldr pc, =1f /* Convert to VA */ 303 1: 304 mcr p15, 0, r8, c2, c0, 0 /* Go to the base address saved in C2: Jump to the page table */ 305 isb 306 307 mov r12, #0 308 mcr p15, 0, r12, c8, c7, 0 309 isb 310 311 sub lr, r11 /* adjust lr with delta of physical address and virtual address */ 312 bx lr 313 314 .code 32 315 316 .global reset_platform 317 .type reset_platform,function 318 reset_platform: 319 #ifdef A7SEM_HAL_ROM_MONITOR 320 /* initialize CPSR (machine state register) */ 321 mov r0, #(CPSR_IRQ_DISABLE|CPSR_FIQ_DISABLE|CPSR_SVC_MODE) 322 msr cpsr, r0 323 b warm_reset 324 #else 325 mov r0, #0 326 mov pc, r0 // Jump to reset vector 327 #endif 328 cpu_start: 329 bl secondary_cpu_start 330 b . 331 332 333 334 /* 335 * set sp for current cpu 336 * r1 is stack bottom, r0 is stack size, r11 hold cpu id 337 */ 338 sp_set: 339 mul r3, r0, r11 340 sub r2, r1, r3 341 mov sp, r2 342 bx lr /* set sp */ 343 344 /* 345 * r4: page table base address 346 * r5 and r6 will be used as variable 347 */ 348 page_table_clear: 349 mov r5, #0 350 mov r6, #0 351 0: 352 str r5, [r4, r6, lsl #2] 353 add r6, #1 354 cmp r6, #0x1000 /* r6 < 4096 */ 355 blt 0b 356 bx lr 357 358 /* 359 * r4: page table base address 360 * r6: physical address 361 * r7: virtual address 362 * r8: sizes 363 * r10: flags 364 * r9 and r12 will be used as variable 365 */ 366 page_table_build: 367 mov r9, r6 368 bfc r9, #20, #12 /* r9: pa % MB */ 369 add r8, r8, r9 370 add r8, r8, #(1 << 20) 371 sub r8, r8, #1 372 lsr r6, #20 /* r6 = physical address / MB */ 373 lsr r7, #20 /* r7 = virtual address / MB */ 374 lsr r8, #20 /* r8 = roundup(size, MB) */ 375 376 page_table_build_loop: 377 orr r12, r10, r6, lsl #20 /* r12: flags | physAddr */ 378 str r12, [r4, r7, lsl #2] /* gPgTable[l1Index] = physAddr | flags */ 379 add r6, #1 /* physAddr+ */ 380 add r7, #1 /* l1Index++ */ 381 subs r8, #1 /* sizes-- */ 382 bne page_table_build_loop 383 bx lr 384 385 /* 386 * init stack to initial value 387 * r0 is stack mem start, r1 is stack mem end 388 */ 389 stack_init: 390 ldr r2, =OS_STACK_INIT 391 ldr r3, =OS_STACK_INIT 392 /* Main loop sets 32 bytes at a time. */ 393 stack_init_loop: 394 .irp offset, #0, #8, #16, #24 395 strd r2, r3, [r0, \offset] 396 .endr 397 add r0, #32 398 cmp r0, r1 399 blt stack_init_loop 400 bx lr 401 402 pa_va_offset: 403 .word . 404 405 /* 406 * set magic num to stack top for all cpu 407 * r0 is stack top, r1 is stack size, r2 is magic num 408 */ 409 excstack_magic: 410 mov r3, #0 411 excstack_magic_loop: 412 str r2, [r0] 413 add r0, r0, r1 414 add r3, r3, #1 415 cmp r3, #CORE_NUM 416 blt excstack_magic_loop 417 bx lr 418 419 /* 420 * 0xe51ff004 = "ldr pc, [pc, #-4]" 421 * next addr value will be the real booting addr 422 */ 423 _bootaddr_setup: 424 mov r0, #0 425 ldr r1, =0xe51ff004 426 str r1, [r0] 427 428 add r0, r0, #4 429 ldr r1, =SYS_MEM_BASE 430 str r1, [r0] 431 432 dsb 433 isb 434 435 bx lr 436 437 init_done: 438 .long 0xDEADB00B 439 440 .code 32 441 .data 442 443 init_flag: 444 .balign 4 445 .long 0 446 447 /* 448 * Temporary interrupt stack 449 */ 450 .section ".int_stack", "wa", %nobits 451 .align 3 452 453 __undef_stack: 454 .space OS_EXC_UNDEF_STACK_SIZE * CORE_NUM 455 __undef_stack_top: 456 457 __abt_stack: 458 .space OS_EXC_ABT_STACK_SIZE * CORE_NUM 459 __abt_stack_top: 460 461 __irq_stack: 462 .space OS_EXC_IRQ_STACK_SIZE * CORE_NUM 463 __irq_stack_top: 464 465 __fiq_stack: 466 .space OS_EXC_FIQ_STACK_SIZE * CORE_NUM 467 __fiq_stack_top: 468 469 __svc_stack: 470 .space OS_EXC_SVC_STACK_SIZE * CORE_NUM 471 __svc_stack_top: 472 473 __exc_stack: 474 .space OS_EXC_STACK_SIZE * CORE_NUM 475 __exc_stack_top:
看不太明白就是复位操作其中的计算虚拟地址和物理地址差的部分实现这里单独贴出来。
1 /* r11: delta of physical address and virtual address */ 2 adr r11, pa_va_offset;此时r11为物理地址 具体原因是硬件决定了第一条指令的地址,当执行到这里pc此时是当前的指令的地址(自然是物理地址) 3 ;然后而adr伪指令的作用就是得到了当前标识pa_va_offset和当前指令的offset和保存在r11,而代码的实现在这个标识处定 4 ;义了一个连接地址相关的标识"."所以按照程序连接指定的运行地址(虚拟的)这里保存的值肯定是连接实际的虚拟运行地址所以r0为虚拟地址 5 ldr r0, [r11] 6 sub r11, r11, r0 ;进而物理地址减去虚拟地址(连接地址)即就是物理地址和虚拟地址的差。
这里主要是对adr这一句的汇编理解的不是很深入,详细了解之后才知道这是一条伪指令在汇编器汇编的时候他会被汇编称为sub这个机器码,具体的原因是在程序连接的过程中 pa_va_offset 的地址是固定的,并且这一条伪指令的地址也是确定的所以他俩之间的偏移就是确定的,这里记住是后面的符号的地址和 adr r11, pa_va_offset ;这条语句的相对偏移是固定的,所以在汇编器这个语句的时候实际上就已经知道他俩的地址差offset的,并且这条伪指令在Rn不为pc时最后的结果就是pc+offset 所以这里的r11 保存的就是程序的运行时刻的物理地址,这一点比较难理解但是你硬件的行为考虑就很容易理解了,硬件启动之后从物理地址的那个地址开始运行具体的硬件模式确定后也是确定的----这个最后反应在程序中就是PC寄存器肯定知道真实的物理地址,所以前面红色字体的内容就能理解了。然后就是后面两句就得到了虚拟地址和物理地址的差。这里确实更加费解但是如果把这句话换一下就好理解多了 实际得到的是链接地址和物理地址的差。具体还要看 pa_va_offset 标识符下的实现下面贴出来
1 pa_va_offset: 2 .word .
这简单的一句话其实就是,在当前符号连接的地址处放置一个32位的值,这个值是程序连接到这里时的连接地址,熟悉连接脚本就很容易想到了。所以这里在这个符号的地址处存放了这个符号的连接地址,有点绕就是在一个链接地址存放了链接地址自己的值,所以上面代码的实现就得到了程序的链接地址和实际运行物理地址的差。之所以注释说是虚拟地址和物理地址的差是因为OS程序在链接时指定的连接地址实际上是在开启了MMU之后的虚拟地址,将来是要拷贝到DRAM中运行的。所以这里的虚拟地址实际上就是链接地址因为必须这样否则系统是无法启动的。最后这里还有一个知识点就是链接地址和运行地址不同为什么还能正常运行呢,这是因为部分代码是PIC的即不关心load地址,也可以通过编译时指定部分代码编译为位置无关的代码,也可查看前面写的博客编译过程中的链接地址和实际运行地址。