浅析基于ARM的Linux下的系统调用的实现

在Linux下系统调用是用软中断实现的,下面以一个简单的open例子简要分析一下应用层的open是如何调用到内核中的sys_open的。

t8.c

   1:  #include <stdio.h>
   2:  #include <sys/types.h>
   3:  #include <sys/stat.h>
   4:  #include <fcntl.h>
   5:   
   6:  int main(int argc, const char *argv[])
   7:  {
   8:      int fd;
   9:   
  10:      fd = open(".", O_RDWR);
  11:   
  12:      close(fd);
  13:      return 0;
  14:  }

这里需要注意的是:open是C库提供的库函数,并不是系统调用,系统调用时在内核空间的,应用空间无法直接调用。在《Linux内核设计与实现》中说:要访问系统调用(在Linux中常称作syscall),通常通过C库中定义的函数调用来进行。

将t8.c进行静态编译,然后反汇编,看一下是如何调用open的?

   1:  arm-linux-gcc t8.c --static
   2:  arm-linux-objdump -D a.out >a.dis

下面我们截取a.dis中的一部分进行说明:

   1:  ......
   2:  00008228 <main>:
   3:      8228:   e92d4800    push    {fp, lr}
   4:      822c:   e28db004    add fp, sp, #4  ; 0x4
   5:      8230:   e24dd010    sub sp, sp, #16 ; 0x10
   6:      8234:   e50b0010    str r0, [fp, #-16]
   7:      8238:   e50b1014    str r1, [fp, #-20]
   8:      823c:   e59f0028    ldr r0, [pc, #40]   ; 826c <main+0x44>
   9:      8240:   e3a01002    mov r1, #2  ; 0x2   ;  #define  O_RDWR  00000002 
  10:      8244:   eb002e7d    bl  13c40 <__libc_open>
  11:      8248:   e1a03000    mov r3, r0
  12:      824c:   e50b3008    str r3, [fp, #-8]
  13:      8250:   e51b0008    ldr r0, [fp, #-8]
  14:      8254:   eb002e9d    bl  13cd0 <__libc_close>
  15:      8258:   e3a03000    mov r3, #0  ; 0x0
  16:      825c:   e1a00003    mov r0, r3
  17:      8260:   e24bd004    sub sp, fp, #4  ; 0x4
  18:      8264:   e8bd4800    pop {fp, lr}
  19:      8268:   e12fff1e    bx  lr
  20:      826c:   00064b8c    .word   0x00064b8c
  21:  ......
  22:  00013c40 <__libc_open>:
  23:     13c40:   e51fc028    ldr ip, [pc, #-40]  ; 13c20 <___fxstat64+0x50>
  24:     13c44:   e79fc00c    ldr ip, [pc, ip]
  25:     13c48:   e33c0000    teq ip, #0  ; 0x0
  26:     13c4c:   1a000006    bne 13c6c <__libc_open+0x2c>
  27:     13c50:   e1a0c007    mov ip, r7
  28:     13c54:   e3a07005    mov r7, #5  ; 0x5   

  #在arch/arm/include/asm/unistd.h中:#define __NR_open  (__NR_SYSCALL_BASE+5)
                    其中,__NR_OABI_SYSCALL_BASE是0

  29:     

13c58: ef000000 svc 0x00000000 #产生软中断

  30:     13c5c:   e1a0700c    mov r7, ip
  31:     13c60:   e3700a01    cmn r0, #4096   ; 0x1000
  32:     13c64:   312fff1e    bxcc    lr
  33:     13c68:   ea0008d4    b   15fc0 <__syscall_error>
  34:  ......

通过上面的代码注释,可以看到,系统调用sys_open的系统调用号是5,将系统调用号存放到寄存器R7当中,然后应用程序通过svc 0x00000000产生软中断,陷入内核空间。

也许会好奇,ARM软中断不是用SWI吗,这里怎么变成了SVC了,请看下面一段话,是从ARM官网copy的:

SVC

超级用户调用。
语法

SVC{cond} #immed

其中:

cond

    是一个可选的条件代码(请参阅条件执行)。

immed

    是一个表达式,其取值为以下范围内的一个整数:

        在 ARM 指令中为 0 到 224–1(24 位值)

        在 16 位 Thumb 指令中为 0-255(8 位值)。

用法

SVC 指令会引发一个异常。 这意味着处理器模式会更改为超级用户模式,CPSR 会保存到超级用户模式 SPSR,并且执行会跳转到 SVC 向量(请参阅《开发指南》中的第 6 章 处理处理器异常)。

处理器会忽略 immed。 但异常处理程序会获取它,借以确定所请求的服务。

Note

作为 ARM 汇编语言开发成果的一部分,SWI 指令已重命名为 SVC。 在此版本的 RVCT 中,SWI 指令反汇编为 SVC,并提供注释以指明这是以前的 SWI。

条件标记

此指令不更改标记。

体系结构

此 ARM 指令可用于所有版本的 ARM 体系结构。

在基于ARM的Linux中,异常向量表已经被放置在了0xFFFF0000这个位置。这个过程的完成:

start_kernel ---> setup_arch ---> early_trap_init

   1:  void __init early_trap_init(void)
   2:  {
   3:      unsigned long vectors = CONFIG_VECTORS_BASE;  // 就是0xFFFF0000
   4:      extern char __stubs_start[], __stubs_end[];
   5:      extern char __vectors_start[], __vectors_end[];
   6:      extern char __kuser_helper_start[], __kuser_helper_end[];
   7:      int kuser_sz = __kuser_helper_end - __kuser_helper_start;
   8:   
   9:      /*
  10:       * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
  11:       * into the vector page, mapped at 0xffff0000, and ensure these
  12:       * are visible to the instruction stream.
  13:       */
  14:      memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
  15:      memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
  16:      memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
  17:   
  18:      /*
  19:       * Copy signal return handlers into the vector page, and
  20:       * set sigreturn to be a pointer to these.
  21:       */
  22:      memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
  23:             sizeof(sigreturn_codes));
  24:   
  25:      flush_icache_range(vectors, vectors + PAGE_SIZE);
  26:      modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
  27:  }

关于上面这个函数的详细解释,参见:

http://www.cnblogs.com/pengdonglin137/p/3603549.html

把异常中断向量表的位置设置为0xffff0000的话,需要修改协处理器CP15的寄存器C1的第13位,将其设置为1。以Tq2440的提供的内核2.6.30.4为例看一下:

arch/arm/kernel/head.S

   1:      adr    lr, __enable_mmu        @ return (PIC) address
   2:      add    pc, r10, #PROCINFO_INITFUNC

其中,PROCINFO_INITFUNC的值是16,r10的值是__arm920_proc_info的地址:

   1:  __arm920_proc_info:
   2:      .long    0x41009200
   3:      .long    0xff00fff0
   4:      .long   PMD_TYPE_SECT | \
   5:          PMD_SECT_BUFFERABLE | \
   6:          PMD_SECT_CACHEABLE | \
   7:          PMD_BIT4 | \
   8:          PMD_SECT_AP_WRITE | \
   9:          PMD_SECT_AP_READ
  10:      .long   PMD_TYPE_SECT | \
  11:          PMD_BIT4 | \
  12:          PMD_SECT_AP_WRITE | \
  13:          PMD_SECT_AP_READ
  14:      b    __arm920_setup
  15:      .long    cpu_arch_name
  16:      .long    cpu_elf_name
  17:       ......
  18:      .size    __arm920_proc_info, . - __arm920_proc_info

看一下__arm920_setup的实现(proc-arm920.S (arch\arm\mm)):

   1:      .type    __arm920_setup, #function
   2:  __arm920_setup:
   3:      mov    r0, #0
   4:      mcr    p15, 0, r0, c7, c7        @ invalidate I,D caches on v4
   5:      mcr    p15, 0, r0, c7, c10, 4        @ drain write buffer on v4
   6:  #ifdef CONFIG_MMU
   7:      mcr    p15, 0, r0, c8, c7        @ invalidate I,D TLBs on v4
   8:  #endif
   9:      adr    r5, arm920_crval
  10:      ldmia    r5, {r5, r6}            @ 参看以下下面的arm920_crval的实现,本句话执行完后r5和r6分别为:0x3f3f和0x3135
  11:      mrc    p15, 0, r0, c1, c0        @ get control register v4   获取协处理器p15的寄存器才c1
  12:      

bic r0, r0, r5

  13:      

orr r0, r0, r6 @ 我们只关注第13位,这里将r0的第13位设置为了1

  14:      

mov pc, lr

  15:      .size    __arm920_setup, . - __arm920_setup
  16:   
  17:      /*
  18:       *  R
  19:       * .RVI ZFRS BLDP WCAM
  20:       * ..11 0001 ..11 0101
  21:       * 
  22:       */
  23:      .type    arm920_crval, #object
  24:  arm920_crval:
  25:      crval    clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130

在看一下crval的实现(proc-macros.S (arch\arm\mm)):

   1:      .macro    crval, clear, mmuset, ucset
   2:  #ifdef CONFIG_MMU
   3:      .word    \clear
   4:      .word    \mmuset
   5:  #else
   6:      .word    \clear
   7:      .word    \ucset
   8:  #endif
   9:      .endm

在__arm920_setup中执行完 mov pc, lr后,便跳入了下面的语句:

   1:  __enable_mmu:
   2:  #ifdef CONFIG_ALIGNMENT_TRAP
   3:      orr    r0, r0, #CR_A
   4:  #else
   5:      bic    r0, r0, #CR_A
   6:  #endif
   7:  #ifdef CONFIG_CPU_DCACHE_DISABLE
   8:      bic    r0, r0, #CR_C
   9:  #endif
  10:  #ifdef CONFIG_CPU_BPREDICT_DISABLE
  11:      bic    r0, r0, #CR_Z
  12:  #endif
  13:  #ifdef CONFIG_CPU_ICACHE_DISABLE
  14:      bic    r0, r0, #CR_I
  15:  #endif
  16:      mov    r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
  17:                domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
  18:                domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
  19:                domain_val(DOMAIN_IO, DOMAIN_CLIENT))
  20:      mcr    p15, 0, r5, c3, c0, 0        @ load domain access register
  21:      mcr    p15, 0, r4, c2, c0, 0        @ load page table pointer
  22:      b    __turn_mmu_on
  23:  ENDPROC(__enable_mmu)

看一下__turn_mmu_on的实现(head.S (arch\arm\kernel)):

   1:      .align    5
   2:  __turn_mmu_on:
   3:      mov    r0, r0
   4:      mcr    p15, 0, r0, c1, c0, 0        @ write control reg
   5:      mrc    p15, 0, r3, c0, c0, 0        @ read id reg
   6:      mov    r3, r3
   7:      mov    r3, r3
   8:      mov    pc, r13
   9:  ENDPROC(__turn_mmu_on)

在__turn_mmu_on中,将寄存器r0的值写到了cp15协处理器的寄存器C1中。到这里便完成了将异常中断向量表的位置放到了0xffff0000.

说完异常向量表的位置,接下来看看软中断的实现。

ARM提供的中断类型:

浅析基于ARM的Linux下的系统调用的实现

ARM的异常处理模型:

浅析基于ARM的Linux下的系统调用的实现

entry-armv.S (arch\arm\kernel)

   1:  .LCvswi:
   2:     

.word vector_swi

   3:   
   4:      .globl    __stubs_end
   5:  __stubs_end:
   6:   
   7:      .equ    stubs_offset, __vectors_start + 0x200 - __stubs_start
   8:   
   9:      .globl    __vectors_start
  10:  __vectors_start:
  11:      swi    SYS_ERROR0
  12:      b    vector_und + stubs_offset
  13:      ldr    pc, .LCvswi + stubs_offset     @发生软中断后先跳到这里
  14:      b    vector_pabt + stubs_offset
  15:      b    vector_dabt + stubs_offset
  16:      b    vector_addrexcptn + stubs_offset
  17:      b    vector_irq + stubs_offset
  18:      b    vector_fiq + stubs_offset
  19:   
  20:      .globl    __vectors_end
  21:  __vectors_end:
  22:   
  23:      .data
  24:   
  25:      .globl    cr_alignment
  26:      .globl    cr_no_alignment
  27:  cr_alignment:
  28:      .space    4
  29:  cr_no_alignment:
  30:      .space    4

接下来看一下vector_swi的实现,根据实际的宏定义进行了简化

   1:  ENTRY(vector_swi)
   2:      sub    sp, sp, #S_FRAME_SIZE
   3:      stmia    sp, {r0 - r12}            @ Calling r0 - r12
   4:      add    r8, sp, #S_PC
   5:      stmdb    r8, {sp, lr}^            @ Calling sp, lr
   6:      mrs    r8, spsr            @ called from non-FIQ mode, so ok.
   7:      str    lr, [sp, #S_PC]            @ Save calling PC
   8:      str    r8, [sp, #S_PSR]        @ Save CPSR
   9:      str    r0, [sp, #S_OLD_R0]        @ Save OLD_R0
  10:      zero_fp
  11:   
  12:      /*
  13:       * Get the system call number.
  14:       */
  15:   
  16:      /*
  17:       * If we have CONFIG_OABI_COMPAT then we need to look at the swi
  18:       * value to determine if it is an EABI or an old ABI call.
  19:       */
  20:      ldr    r10, [lr, #-4]   

            @ get SWI instruction  r10中存放的就是引起软中断的那条指令的机器码
              发生软中断的时候,系统自动将PC-4存放到了lr寄存器,由于是三级流水,
              并且是ARM状态,还需要减4才能得到发生软中断的那条指令的机器码所在的地址


  21:    A710(    and    ip, r10, #0x0f000000        @ check for SWI        )
  22:    A710(    teq    ip, #0x0f000000                        )
  23:    A710(    bne    .Larm710bug                        )
  24:   
  25:      ldr    ip, __cr_alignment
  26:      ldr    ip, [ip]
  27:      mcr    p15, 0, ip, c1, c0        @ update control register
  28:      enable_irq   @在发生中断的时候,相应的中断线在在所有CPU上都会被屏蔽掉
  29:   
  30:      

get_thread_info tsk @ 参看下面的介绍

31: adr tbl, sys_call_table

@ load syscall table pointer 此时tbl(r8)中存放的就是sys_call_table的起始地址

  32:      ldr    ip, [tsk, #TI_FLAGS]        @ check for syscall tracing
  33:   
  34:      /*
  35:       * If the swi argument is zero, this is an EABI call and we do nothing.
  36:       *
  37:       * If this is an old ABI call, get the syscall number into scno and
  38:       * get the old ABI syscall table address.
  39:       */
  40:      bics    r10, r10, #0xff000000
  41:      eorne    scno, r10, #__NR_OABI_SYSCALL_BASE
  42:      ldr

ne

    tbl, =sys_oabi_call_table
  43:   
  44:      stmdb    sp!, {r4, r5}            @ push fifth and sixth args
  45:      tst    ip, #_TIF_SYSCALL_TRACE        @ are we tracing syscalls?
  46:      bne    __sys_trace
  47:   
  48:      cmp    scno, #NR_syscalls        @ check upper syscall limit
  49:      adr    lr, ret_fast_syscall        @ return address
  50:      

ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine

  51:   
  52:      add    r1, sp, #S_OFF
  53:  2:    mov    why, #0                @ no longer a real syscall
  54:      cmp    scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
  55:      eor    r0, scno, #__NR_SYSCALL_BASE    @ put OS number back
  56:      bcs    arm_syscall
  57:      b    sys_ni_syscall            @ not private func
  58:  ENDPROC(vector_swi)


entry-common.S (arch\arm\kernel下面是entry-header.S (arch\arm\kernel)的部分内容:

   1:  /*
   2:   * These are the registers used in the syscall handler, and allow us to
   3:   * have in theory up to 7 arguments to a function - r0 to r6.
   4:   *
   5:   * r7 is reserved for the system call number for thumb mode.
   6:   *
   7:   * Note that tbl == why is intentional.
   8:   *
   9:   * We must set at least "tsk" and "why" when calling ret_with_reschedule.
  10:   */
  11:  scno    .req    r7        @ syscall number
  12:  tbl    .req    r8        @ syscall table pointer
  13:  why    .req    r8        @ Linux syscall (!= 0)
  14:  tsk    .req    r9        @ current thread_info

.req 是伪汇编,以 scno .req r7 为例,表示scno是寄存器r7的别名。

  • get_thread_info tsk

其中,tsk是寄存器r9的别名,get_thread_info是一个宏定义,如下:

   1:      .macro    get_thread_info, rd
   2:      mov    \rd, sp, lsr #13
   3:      mov    \rd, \rd, lsl #13
   4:      .endm

即:将sp进行8KB对齐后的值赋给寄存器r9,什么意思?

这个就涉及到Linux的内核栈了。Linux为每个进程都分配了一个8KB的内核栈,在内核栈的尾端存放有关于这个进程的struct therad_info结构:

   1:  struct thread_info {
   2:      unsigned long        flags;        /* low level flags */
   3:      int            preempt_count;    /* 0 => preemptable, <0 => bug */
   4:      mm_segment_t        addr_limit;    /* address limit */
   5:      struct task_struct    *task;        /* main task structure */
   6:      struct exec_domain    *exec_domain;    /* execution domain */
   7:      __u32            cpu;        /* cpu */
   8:      __u32            cpu_domain;    /* cpu domain */
   9:      struct cpu_context_save    cpu_context;    /* cpu context */
  10:      __u32            syscall;    /* syscall number */
  11:      __u8            used_cp[16];    /* thread used copro */
  12:      unsigned long        tp_value;
  13:      struct crunch_state    crunchstate;
  14:      union fp_state        fpstate __attribute__((aligned(8)));
  15:      union vfp_state        vfpstate;
  16:  #ifdef CONFIG_ARM_THUMBEE
  17:      unsigned long        thumbee_state;    /* ThumbEE Handler Base register */
  18:  #endif
  19:      struct restart_block    restart_block;
  20:  };

通过上面的操作,寄存器r9中就是这个进程的thread_info结构的起始地址。

  • sys_call_table

entry-common.S (arch\arm\kernel)

   1:      .type    sys_call_table, #object
   2:  ENTRY(sys_call_table)
   3:  #include "calls.S"
   4:  #undef ABI
   5:  #undef OBSOLETE


其中,calls.S的内容如下:

   1:  /*
   2:   *  linux/arch/arm/kernel/calls.S
   3:   *
   4:   *  Copyright (C) 1995-2005 Russell King
   5:   *
   6:   * This program is free software; you can redistribute it and/or modify
   7:   * it under the terms of the GNU General Public License version 2 as
   8:   * published by the Free Software Foundation.
   9:   *
  10:   *  This file is included thrice in entry-common.S
  11:   */
  12:  /* 0 */        CALL(sys_restart_syscall)
  13:          CALL(sys_exit)
  14:          CALL(sys_fork_wrapper)
  15:          CALL(sys_read)
  16:          CALL(sys_write)
  17:  /* 5 */        CALL(sys_open)
  18:          CALL(sys_close)
  19:          CALL(sys_ni_syscall)        /* was sys_waitpid */
  20:          CALL(sys_creat)
  21:          CALL(sys_link)
  22:  /* 10 */    CALL(sys_unlink)
  23:          CALL(sys_execve_wrapper)
  24:          CALL(sys_chdir)
  25:          CALL(OBSOLETE(sys_time))    /* used by libc4 */
  26:          CALL(sys_mknod)
  27:  ......
  28:  /* 355 */    CALL(sys_signalfd4)
  29:          CALL(sys_eventfd2)
  30:          CALL(sys_epoll_create1)
  31:          CALL(sys_dup3)
  32:          CALL(sys_pipe2)
  33:  /* 360 */    CALL(sys_inotify_init1)
  34:          CALL(sys_preadv)
  35:          CALL(sys_pwritev)
  36:  #ifndef syscalls_counted
  37:  .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
  38:  #define syscalls_counted
  39:  #endif
  40:  .rept syscalls_padding
  41:          CALL(sys_ni_syscall)
  42:  .endr

关于这个部分的更多介绍参见:

http://www.cnblogs.com/pengdonglin137/p/3714981.html

  • bics    r10, r10, #0xff000000

执行这个操作的时候,r10中存放的是SWI instruction,在我们的例子中就是(a.dis):

浅析基于ARM的Linux下的系统调用的实现

即:r10 为 0xEF000000

显然,bics这条指令下面的两个语句由于条件不成立,无法获得执行。这条指令的作用是获得系统调用号

可以参考这个手册,看一下svc执行的格式:

http://files.cnblogs.com/pengdonglin137/DUI0203IC_rvct_developer_guide.pdf

浅析基于ARM的Linux下的系统调用的实现

可以看到,[23:0]存放的就是svc指令后面的那个立即数,也即系统调用号。

不过需要注意的是:我们这里并没有这样做,我们的做法是(a.dis中可以看到):

浅析基于ARM的Linux下的系统调用的实现

使用的是svc 0,后面跟的并不是系统调用号,而是0,这里把系统调用号存放在了寄存器r7中(a.dis中):

浅析基于ARM的Linux下的系统调用的实现

可以看到,由于使用的sys_open系统调用,所以把它的系统调用号5存放到了寄存器r7当中

  • ldrcc    pc, [tbl, scno, lsl #2]        @ call sys_* routine

这里的scno是就是寄存器r7的别名,它的值是sys_open的系统调用号5,由于在calls.S中每个系统调用标号占用4个字节,所以这个将scno的值乘以4然后再加上tbl,tbl是系统调用表sys_call_table的基地址。然后就跳入开始执行sys_open了。

asmlinkage long sys_open(const char __user *filename,
int flags, int mode);

那么sys_open在哪呢?在内核源码中直接搜索sys_open,无法搜到它的实现代码,实际上它是在fs/open.c中实现的:

   1:  SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
   2:  {
   3:      long ret;
   4:   
   5:      if (force_o_largefile())
   6:          flags |= O_LARGEFILE;
   7:   
   8:      ret = do_sys_open(AT_FDCWD, filename, flags, mode);
   9:      /* avoid REGPARM breakage on x86: */
  10:      asmlinkage_protect(3, ret, filename, flags, mode);
  11:      return ret;
  12:  }

其中SYSCALL_DEFINE3是一个宏:

syscalls.h (include\linux)

#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)

SYSCALL_DEFINEx也是一个宏:

syscalls.h (include\linux)

#define SYSCALL_DEFINEx(x, sname, ...)                \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

__SYSCALL_DEFINEx仍然是个宏:

syscalls.h (include\linux)

#define __SYSCALL_DEFINEx(x, name, ...)                    \
asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))

所以展开后的结果就是:

asmlinkage long sys_open(__SC_DECL3(__VA_ARGS__))

其中,__SC_DECL3定义如下:

syscalls.h (include\linux)

   1:  #define __SC_DECL1(t1, a1)    t1 a1
   2:  #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
   3:  #define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)

所以最终的结果如下:

   1:  asmlinkage long sys_open(const char __user *filename, int flags, int mode)
   2:  {
   3:      long ret;
   4:   
   5:      if (force_o_largefile())
   6:          flags |= O_LARGEFILE;
   7:   
   8:      ret = do_sys_open(AT_FDCWD, filename, flags, mode);
   9:      /* avoid REGPARM breakage on x86: */
  10:      asmlinkage_protect(3, ret, filename, flags, mode);
  11:      return ret;
  12:   
  13:  }


关于sys_open本身的实现这里就不深入分析了。

接下来看一下返回。

  • adr    lr, ret_fast_syscall        @ return address

当sys_open中return后,便跳入ret_fast_syscall处开始执行:

   1:  /*
   2:   * This is the fast syscall return path.  We do as little as
   3:   * possible here, and this includes saving r0 back into the SVC
   4:   * stack.
   5:   */
   6:  ret_fast_syscall:
   7:   UNWIND(.fnstart    )
   8:   UNWIND(.cantunwind    )
   9:      disable_irq                @ disable interrupts
  10:      

ldr r1, [tsk, #TI_FLAGS] @将thread_info中的flags成员存放到r1中

  11:      tst    r1, #_TIF_WORK_MASK
  12:      bne    fast_work_pending  
  13:   
  14:      /* perform architecture specific actions before user return */
  15:      arch_ret_to_user r1, lr
  16:   
  17:      @ fast_restore_user_regs
  18:      ldr    r1, [sp, #S_OFF + S_PSR]    @ get calling cpsr
  19:      ldr    lr, [sp, #S_OFF + S_PC]!    @ get pc
  20:      msr    spsr_cxsf, r1            @ save in spsr_svc
  21:      ldmdb    sp, {r1 - lr}^            @ get calling r1 - lr
  22:      mov    r0, r0
  23:      add    sp, sp, #S_FRAME_SIZE - S_PC
  24:      movs    pc, lr                @ return & move spsr_svc into cpsr
  25:   UNWIND(.fnend        )
  26:   
  27:  /*
  28:   * Ok, we need to do extra processing, enter the slow path.
  29:   */
  30:  fast_work_pending:
  31:      str    r0, [sp, #S_R0+S_OFF]!        @ returned r0
  32:  work_pending:
  33:      tst    r1, #_TIF_NEED_RESCHED       @判断是否需要进行进程调度
  34:      bne    work_resched
  35:      tst    r1, #_TIF_SIGPENDING
  36:      beq    no_work_pending
  37:      mov    r0, sp                @ 'regs'
  38:      mov    r2, why                @ 'syscall'
  39:      bl    do_notify_resume
  40:      b    ret_slow_syscall        @ Check work again
  41:   
  42:  

work_resched:

  43:      bl    schedule
  44:  /*
  45:   * "slow" syscall return path.  "why" tells us if this was a real syscall.
  46:   */
  47:  ENTRY(ret_to_user)
  48:  ret_slow_syscall:
  49:      disable_irq                @ disable interrupts
  50:      ldr    r1, [tsk, #TI_FLAGS]
  51:      tst    r1, #_TIF_WORK_MASK
  52:      bne    work_pending
  53:  no_work_pending:
  54:      /* perform architecture specific actions before user return */
  55:      arch_ret_to_user r1, lr
  56:   
  57:      @ slow_restore_user_regs
  58:      ldr    r1, [sp, #S_PSR]        @ get calling cpsr
  59:      ldr    lr, [sp, #S_PC]!        @ get pc
  60:      msr    spsr_cxsf, r1            @ save in spsr_svc
  61:      ldmdb    sp, {r0 - lr}^            @ get calling r0 - lr
  62:      mov    r0, r0
  63:      add    sp, sp, #S_FRAME_SIZE - S_PC
  64:      movs    pc, lr                @ return & move spsr_svc into cpsr
  65:  ENDPROC(ret_to_user)

在返回的时候要看是否要进行进程调用。

浅析基于ARM的Linux下的系统调用的实现

先分析到这里。

上一篇:【HDOJ】2828 Lamp


下一篇:Python unittest appium