理解Linux内核之中断控制

乍一看下边的Linux内核代码,貌似L3389有bug,于是我就绕有兴趣地阅读了一下local_irq_save/local_irq_restore的源代码。

/* linux-4.14.12/mm/slab.c#3389 */

  static __always_inline void *
slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
{
unsigned long save_flags;
void *objp;
....
local_irq_save(save_flags);
objp = __do_cache_alloc(cachep, flags);
local_irq_restore(save_flags);
....
return objp;
}

在L3380和L3389中, 如果local_irq_save()是一个函数,必然存在着bug, 因为需要把save_flags的变量地址传给local_irq_save()才对。

      unsigned long save_flags;
....
local_irq_save(save_flags);

L3389是不是该是这样才对啊?

      local_irq_save(&save_flags);

但是,local_irq_save()和local_irq_restore()不是函数,而是宏,这样就没有bug了。

1. local_irq_save()和local_irq_restore()的实现

/* linux-4.14.12/include/linux/irqflags.h#139 */

  #ifdef CONFIG_TRACE_IRQFLAGS
...
#define local_irq_save(flags) \
do { \
raw_local_irq_save(flags); \
trace_hardirqs_off(); \
} while () #define local_irq_restore(flags) \
do { \
if (raw_irqs_disabled_flags(flags)) { \
raw_local_irq_restore(flags); \
trace_hardirqs_off(); \
} else { \
trace_hardirqs_on(); \
raw_local_irq_restore(flags); \
} \
} while ()
...
#else /* !CONFIG_TRACE_IRQFLAGS */
...
#define local_irq_save(flags) \
do { \
raw_local_irq_save(flags); \
} while ()
#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)
...
#endif /* CONFIG_TRACE_IRQFLAGS */

为简单起见,我们只关注!CONFIG_TRACE_IRQFLAGS分支就好了,

  #define local_irq_save(flags)                              \
do { \
raw_local_irq_save(flags); \
} while ()
#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)

于是, 我们可以认为, locale_irq_save()/local_irq_restore()等同于:

#define local_irq_save(flags)    raw_local_irq_save(flags)
#define local_irq_restore(flags) raw_local_irq_restore(flags)

2. raw_local_irq_save()和raw_local_irq_restore()的实现

/* linux-4.14.12/include/linux/irqflags.h#78 */

  #define raw_local_irq_save(flags)                   \
do { \
typecheck(unsigned long, flags); \
flags = arch_local_irq_save(); \
} while ()
#define raw_local_irq_restore(flags) \
do { \
typecheck(unsigned long, flags); \
arch_local_irq_restore(flags); \
} while ()

关于宏typecheck()不做解释,因为很直观,就是保证flags的类型必须是unsigned long。于是,raw_local_irq_save()和raw_local_irq_restore()等同于:

#define raw_local_irq_save(flags)       flags = arch_local_irq_save()
#define raw_local_irq_restore(flags) arch_local_irq_restore(flags)

下面以x86为例说明arch_local_irq_save()和arch_local_irq_restore()这两个函数的实现。

3. arch_local_irq_save()和arch_local_irq_restore()这两个函数在x86上的实现

/* linux-4.14.12/arch/x86/include/asm/irqflags.h#70 */

  static inline notrace unsigned long arch_local_save_flags(void)
{
return native_save_fl();
} static inline notrace void arch_local_irq_restore(unsigned long flags)
{
native_restore_fl(flags);
}
...
static inline notrace unsigned long arch_local_irq_save(void)
{
unsigned long flags = arch_local_save_flags();
arch_local_irq_disable();
return flags;
}

函数arch_local_irq_save()在调用arch_local_save_flags()还做了一件事,那就是调用arch_local_irq_disable()把中断禁止掉。 接下来,我们首先看看native_save_fl()和native_restore_fl()的具体实现。

3.1 native_save_fl()的实现

/* linux-4.14.12/arch/x86/include/asm/irqflags.h#16 */

  static inline unsigned long native_save_fl(void)
{
unsigned long flags; /*
21 * "=rm" is safe here, because "pop" adjusts the stack before
22 * it evaluates its effective address -- this is part of the
23 * documented behavior of the "pop" instruction.
24 */
asm volatile("# __raw_save_flags\n\t"
"pushf ; pop %0"
: "=rm" (flags)
: /* no input */
: "memory"); return flags;
}

这是一段内嵌的汇编代码,后面写一个简单的demo再解释。

3.2 native_restore_fl()的实现

/* linux-4.14.12/arch/x86/include/asm/irqflags.h#34 */

  static inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
:"g" (flags)
:"memory", "cc");
}

同样,这也是内嵌的汇编代码,后面写一个简单的demo再解释。

3.3 反汇编理解native_save_fl()和native_restore_fl()

  • foo.c
 static inline unsigned long native_save_fl(void)
{
unsigned long flags; /*
* "=rm" is safe here, because "pop" adjusts the stack before
* it evaluates its effective address -- this is part of the
* documented behavior of the "pop" instruction.
*/
asm volatile("# __raw_save_flags\n\t"
"pushf ; pop %0"
: "=rm" (flags)
: /* no input */
: "memory"); return flags;
} static inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
:"g" (flags)
:"memory", "cc");
} int main(int argc, char *argv[])
{
unsigned long flags = native_save_fl();
native_restore_fl(flags);
return ;
}
  • 用gcc编译并反汇编
veli@idorax:/tmp$ gcc -g -Wall -o foo foo.c
veli@idorax:/tmp$
veli@idorax:/tmp$ gdb foo
GNU gdb (Ubuntu 7.11.-0ubuntu1~16.5) 7.11.
...<snip>...................................
(gdb) set disassembly-flavor intel
(gdb)
(gdb) disas /m main
Dump of assembler code for function main:
{
0x00000000004004f5 <+>: push rbp
0x00000000004004f6 <+>: mov rbp,rsp
0x00000000004004f9 <+>: sub rsp,0x20
0x00000000004004fd <+>: mov DWORD PTR [rbp-0x14],edi
0x0000000000400500 <+>: mov QWORD PTR [rbp-0x20],rsi unsigned long flags = native_save_fl();
0x0000000000400504 <+>: call 0x4004d6 <native_save_fl>
0x0000000000400509 <+>: mov QWORD PTR [rbp-0x8],rax native_restore_fl(flags);
0x000000000040050d <+>: mov rax,QWORD PTR [rbp-0x8]
0x0000000000400511 <+>: mov rdi,rax
0x0000000000400514 <+>: call 0x4004e6 <native_restore_fl> return ;
0x0000000000400519 <+>: mov eax,0x0 }
0x000000000040051e <+>: leave
0x000000000040051f <+>: ret End of assembler dump.
(gdb) #
(gdb) disas /m native_save_fl
Dump of assembler code for function native_save_fl:
{
0x00000000004004d6 <+>: push rbp
0x00000000004004d7 <+>: mov rbp,rsp unsigned long flags; /*
6 * "=rm" is safe here, because "pop" adjusts the stack before
7 * it evaluates its effective address -- this is part of the
8 * documented behavior of the "pop" instruction.
9 */
asm volatile("# __raw_save_flags\n\t"
0x00000000004004da <+>: pushf
0x00000000004004db <+>: pop rax
0x00000000004004dc <+>: mov QWORD PTR [rbp-0x8],rax "pushf ; pop %0"
: "=rm" (flags)
: /* no input */
: "memory"); return flags;
0x00000000004004e0 <+>: mov rax,QWORD PTR [rbp-0x8] }
0x00000000004004e4 <+>: pop rbp
0x00000000004004e5 <+>: ret End of assembler dump.
(gdb) #
(gdb) disas /m native_restore_fl
Dump of assembler code for function native_restore_fl:
{
0x00000000004004e6 <+>: push rbp
0x00000000004004e7 <+>: mov rbp,rsp
0x00000000004004ea <+>: mov QWORD PTR [rbp-0x8],rdi asm volatile("push %0 ; popf"
0x00000000004004ee <+>: push QWORD PTR [rbp-0x8]
0x00000000004004f1 <+>: popf : /* no output */
:"g" (flags)
:"memory", "cc");
}
0x00000000004004f2 <+>: nop
0x00000000004004f3 <+>: pop rbp
0x00000000004004f4 <+>: ret End of assembler dump.
(gdb) q
veli@idorax:/tmp$

根据上面的反汇编代码不难看出,native_save_fl()和native_restore_fl()的实现异常简单。

  • native_save_fl()
; static inline unsigned long native_save_fl(void)

0x00000000004004da <+>:        pushf                          ; 把标志寄存器(FLAGS)压入栈(Stack)中
0x00000000004004db <+>: pop rax ; 通过出栈操作把标志寄存器的值存入rax中
0x00000000004004dc <+>: mov QWORD PTR [rbp-0x8],rax ; 把rax存入局部变量flags中
0x00000000004004e0 <+>: mov rax,QWORD PTR [rbp-0x8] ; 根据ABI, 返回值总是存于rax中,这里等同于return flags
  • native_restore_fl()
; static inline void native_restore_fl(unsigned long flags)

0x00000000004004ea <+>:        mov    QWORD PTR [rbp-0x8],rdi ; 根据ABI, 函数的第一个参数通过寄存器rdi传递
; 于是,等同于将第一个参数flags存入一个局部变量中
0x00000000004004ee <+>: push QWORD PTR [rbp-0x8] ; 等同于将第一个参数flags压入栈中
0x00000000004004f1 <+>: popf ; 通过初栈操作把flags的值回复到标志寄存器FLAGS中

注意:操作标志寄存器FLAGS(16位: flags, 32位: eflags, 64位: rflags),必须通过pushf和popf这两个指令,而不能使用push和pop指令。

因此,我们可以得出如下结论,(在x86平台上)

  • local_irq_save()就是把标志寄存器保存到一个局部变量flags中,然后禁止中断;
  • local_irq_restore()则是通过局部变量flags的值恢复标志寄存器,中断自动打开。

4. arch_local_irq_disable()和arch_local_irq_enable()在x86上的实现

/* linux-4.14.12/arch/x86/include/asm/irqflags.h#80 */

  static inline notrace void arch_local_irq_disable(void)
{
native_irq_disable();
} static inline notrace void arch_local_irq_enable(void)
{
native_irq_enable();
} /* linux-4.14.12/arch/x86/include/asm/irqflags.h#42 */
static inline void native_irq_disable(void)
{
asm volatile("cli": : :"memory");
} static inline void native_irq_enable(void)
{
asm volatile("sti": : :"memory");
}

从上面的代码可以看出,在x86中,

  • arch_local_irq_disable()的实质是执行汇编指令cli
  • arch_local_irq_enable的实质则是执行汇编指令sti

到此为止,我们已经搞清楚了如下4个宏的作用。

  1. local_irq_disable() : 禁止本地中断传递。 在x86上,本质上是调用汇编指令cli;
  2. local_irq_enable() : 激活本地中断传递。 在x86上,本质上是调用汇编指令sti;
  3. local_irq_save() : 保存本地中断传递的当前状态,然后禁止本地中断传递。在x86上,本质上是调用pushf+pop先保存标志寄存器到一个变量flags中,然后调用汇编指令cli;
  4. local_irq_restore() : 恢复本地中断传递到给定的状态。在x86上,本质上是调用push+popf重置标志寄存器。

更多有关中断控制的细节,请阅读源代码和《Linux Kernel Development》一书的第7章:中断和中断处理。 常用的中断控制方法,如下图所示。

理解Linux内核之中断控制

If all you have is a hammer, everything looks like a nail. | 如果你拥有的东西就只有一把锤子,那么一切事物在你眼里都看起来是钉子。 (P.S. 保持Open的心态很重要啊)
上一篇:device host global 函数要求


下一篇:PNG,JPEG,BMP,JIF图片格式详解及其对比