CSAPP的BombLab实验

这篇文章记录一下做bomb实验的过程。这几天封宿舍,每天除了做核酸之外就不能出去。每天拆一个炸弹,算上彩蛋刚好用时一周。

首先使用objdump -d bomb > bomb.txt得到反汇编代码。
查看bomb.c文件看到有phase_1phase_6 6个输入函数,对应6个炸弹。下面从第一个开始拆除。

文章目录

phase_1

phase_1中调用了strings_not_equal,后者又调用了string_length,所以从string_length看起,翻译到C语言如下:

/*
 * str in %rdi, length in %rax, p in %rdx
 * 功能是返回字符串的长度
 */
int string_length(char *str) {
    int length;
    if (str == NULL)
        return 0;
    char *p = str;
    while (p != NULL) {
        p++;
        length = p - str;
    }
    return length;
}

然后再看strings_not_equal,翻译到C语言如下:

/*
 * 若两个字符串不同则返回1,否则返回0
 * 为了让代码更加清晰,在不改变汇编代码原意的前提下我做了些修改
 * str1 in %rdi, str2 in %rsi
 * str1_len in %r12, p1 in %rbx, p2 in %rbp
 */
int strings_not_equal(char *str1, char *str2) {
    char *p1 = str1;
    char *p2 = str2;
    int str1_len = string_length(str1);
    int str2_len = string_length(str2);

    if (str1_len != str2_len)
        return 1;
    char c1 = *p1;
    if (c1 == NULL)
        return 0;
    char c2 = *p2;
    while (c1 != NULL) {
        if (c1 != c2)
            return 1;
        // 下一个字符
        p1++;   p2++;
        c1 = *p1;   c2 = *p2;
    }
    return 0;
}

做好这些准备开始看phase_1

0000000000400ee0 <phase_1>:
  400ee0:	48 83 ec 08          	sub    $0x8,%rsp
  400ee4:	be 00 24 40 00       	mov    $0x402400,%esi
  400ee9:	e8 4a 04 00 00       	call   401338 <strings_not_equal>
  400eee:	85 c0                	test   %eax,%eax
  400ef0:	74 05                	je     400ef7 <phase_1+0x17>
  400ef2:	e8 43 05 00 00       	call   40143a <explode_bomb>
  400ef7:	48 83 c4 08          	add    $0x8,%rsp
  400efb:	c3                   	ret    

test + je的组合意思是若%eax == 0,则跳转。phase_1很简单,就是比较以下输入的字符串和内存中的一个字符串是否相等。可以看到输入的字符串应该等于内存0x402400处的字符串,通过gdb的x/s 0x402400查看得到该处字符串为Border relations with Canada have never been better.

phase_2

read_six_numbers函数中调用了sscanf。注意到有一句mov $0x4025c3, %esi,通过x/s 0x4025c3查看该字符串为"%d %d %d %d %d %d",因此该函数读取6个整数,并且可以看到是从%rsi中依次读取。若其返回值小于6,即读取的值少于6个则会引爆炸弹。因此最终可以翻译为:

/*
 * input in %rsi, arr in stack
 */
int read_six_numbers(char *input, int* arr) {
    int scan_num = sscanf(input, "%d %d %d %d %d %d", arr, &arr[1], &arr[2], &arr[3], &arr[4], &arr[5]);
    if (scan_num < 6)
        explode_bomb();
    return scan_num;
}

phase_2代码如下:

0000000000400efc <phase_2>:
  400efc:	55                   	push   %rbp
  400efd:	53                   	push   %rbx
  400efe:	48 83 ec 28          	sub    $0x28,%rsp
  400f02:	48 89 e6             	mov    %rsp,%rsi
  400f05:	e8 52 05 00 00       	call   40145c <read_six_numbers>
  400f0a:	83 3c 24 01          	cmpl   $0x1,(%rsp)
  400f0e:	74 20                	je     400f30 <phase_2+0x34>
  400f10:	e8 25 05 00 00       	call   40143a <explode_bomb>
  400f15:	eb 19                	jmp    400f30 <phase_2+0x34>
  400f17:	8b 43 fc             	mov    -0x4(%rbx),%eax
  400f1a:	01 c0                	add    %eax,%eax
  400f1c:	39 03                	cmp    %eax,(%rbx)
  400f1e:	74 05                	je     400f25 <phase_2+0x29>
  400f20:	e8 15 05 00 00       	call   40143a <explode_bomb>
  400f25:	48 83 c3 04          	add    $0x4,%rbx
  400f29:	48 39 eb             	cmp    %rbp,%rbx
  400f2c:	75 e9                	jne    400f17 <phase_2+0x1b>
  400f2e:	eb 0c                	jmp    400f3c <phase_2+0x40>
  400f30:	48 8d 5c 24 04       	lea    0x4(%rsp),%rbx
  400f35:	48 8d 6c 24 18       	lea    0x18(%rsp),%rbp
  400f3a:	eb db                	jmp    400f17 <phase_2+0x1b>
  400f3c:	48 83 c4 28          	add    $0x28,%rsp
  400f40:	5b                   	pop    %rbx
  400f41:	5d                   	pop    %rbp
  400f42:	c3                   	ret    

调试分析phase_2,首先需要输入6个整数,以空格隔开。然后要满足第一个数字 n 1 n_{1} n1​必须是1,且后一个数字 n i + 1 n_{i+1} ni+1​是前一个数字 n i n_{i} ni​的两倍。翻译如下:

void phase_2(char *input) {
    int arr[6];
    read_six_numbers(input, arr);
    // 第一个数字必须是1
    if (arr[0] != 1)
        explode_bomb();
    for (int i = 1; i < 6; i++) {
        // 下一个必须是前一个的两倍
        if (arr[i] != 2 * arr[i - 1])
            explode_bomb();
    }
}

因此答案则为1 2 4 8 16 32

phase_3

代码如下:

0000000000400f43 <phase_3>:
  400f43:	48 83 ec 18          	sub    $0x18,%rsp
  400f47:	48 8d 4c 24 0c       	lea    0xc(%rsp),%rcx
  400f4c:	48 8d 54 24 08       	lea    0x8(%rsp),%rdx
  400f51:	be cf 25 40 00       	mov    $0x4025cf,%esi
  400f56:	b8 00 00 00 00       	mov    $0x0,%eax
  400f5b:	e8 90 fc ff ff       	call   400bf0 <__isoc99_sscanf@plt>
  400f60:	83 f8 01             	cmp    $0x1,%eax
  400f63:	7f 05                	jg     400f6a <phase_3+0x27>
  400f65:	e8 d0 04 00 00       	call   40143a <explode_bomb>
  400f6a:	83 7c 24 08 07       	cmpl   $0x7,0x8(%rsp)
  400f6f:	77 3c                	ja     400fad <phase_3+0x6a>
  400f71:	8b 44 24 08          	mov    0x8(%rsp),%eax
  400f75:	ff 24 c5 70 24 40 00 	jmp    *0x402470(,%rax,8)
  400f7c:	b8 cf 00 00 00       	mov    $0xcf,%eax
  400f81:	eb 3b                	jmp    400fbe <phase_3+0x7b>
  400f83:	b8 c3 02 00 00       	mov    $0x2c3,%eax
  400f88:	eb 34                	jmp    400fbe <phase_3+0x7b>
  400f8a:	b8 00 01 00 00       	mov    $0x100,%eax
  400f8f:	eb 2d                	jmp    400fbe <phase_3+0x7b>
  400f91:	b8 85 01 00 00       	mov    $0x185,%eax
  400f96:	eb 26                	jmp    400fbe <phase_3+0x7b>
  400f98:	b8 ce 00 00 00       	mov    $0xce,%eax
  400f9d:	eb 1f                	jmp    400fbe <phase_3+0x7b>
  400f9f:	b8 aa 02 00 00       	mov    $0x2aa,%eax
  400fa4:	eb 18                	jmp    400fbe <phase_3+0x7b>
  400fa6:	b8 47 01 00 00       	mov    $0x147,%eax
  400fab:	eb 11                	jmp    400fbe <phase_3+0x7b>
  400fad:	e8 88 04 00 00       	call   40143a <explode_bomb>
  400fb2:	b8 00 00 00 00       	mov    $0x0,%eax
  400fb7:	eb 05                	jmp    400fbe <phase_3+0x7b>
  400fb9:	b8 37 01 00 00       	mov    $0x137,%eax
  400fbe:	3b 44 24 0c          	cmp    0xc(%rsp),%eax
  400fc2:	74 05                	je     400fc9 <phase_3+0x86>
  400fc4:	e8 71 04 00 00       	call   40143a <explode_bomb>
  400fc9:	48 83 c4 18          	add    $0x18,%rsp
  400fcd:	c3                   	ret    

注意第五行,首先查看0x4025cf处的字符串为"%d %d",确定读入2个整数。通过调试确定第一个参数位于%rsp + 8,第二个参数位于%rsp + 0xc处。且第一个参数不能大于7。
对于间接跳转指令jmp *0x402470(, %rax, 8),使用x/wx 0x402470查看其值为0x400f7c,并且使用相同的命令查看switch各个情况的跳转目标地址。

注意间接跳转指令的含义是跳转到0x402470 + 8 * %rax存储的地址处,而非0x402470存储的地址再加8 * %rax处。

另外,注意mov和lea的区别。mov 0x0(%rsp) %rax是把%rsp处的数据给%rax,而lea 0x0(%rsp) %rax是把%rsp的值给%rax

下面是对用的C语言:

void phase_3(char *input) {
    int a, b;
    int scan_num = sscanf(input, "%d %d", &a, &b);
    if (scan_num < 2)
        explode_bomb();
    
    int eax;
    if (a > 7 || a < 0)
        explode_bomb();
    switch (a) {
    // 0x400f7c in 0x402470
    case 0:
        eax = 0xcf;
        break;
    // 0x400fb9 in 0x402478
    case 1:
        eax = 0x137;
        break;
    // 0x400f83 in 0x402480
    case 2:
        eax = 0x2c3;
        break;
    // 0x400f8a in 0x402488
    case 3:
        eax = 0x100;
        break;
    // 0x400f91 in 0x402490
    case 4:
        eax = 0x185;
        break;
    // 0x400f98 in 0x402498
    case 5:
        eax = 0xce;
        break;
    // 0x400f9f in 0x4024a0
    case 6:
        eax = 0x2aa;
        break;
    // 0x400fa6 in 0x4024a8
    case 7:
        eax = 0x147;
        break;
    }
    if (b != eax)
        explode_bomb();
}

因此以下答案都是正确的:(0, 207) (1, 311) (2, 707) (3, 256) (4, 389) (5, 206) (6, 682) (7, 327)

phase_4

phase_4调用了func4

0000000000400fce <func4>:
  400fce:	48 83 ec 08          	sub    $0x8,%rsp
  400fd2:	89 d0                	mov    %edx,%eax
  400fd4:	29 f0                	sub    %esi,%eax
  400fd6:	89 c1                	mov    %eax,%ecx
  400fd8:	c1 e9 1f             	shr    $0x1f,%ecx
  400fdb:	01 c8                	add    %ecx,%eax
  400fdd:	d1 f8                	sar    %eax
  400fdf:	8d 0c 30             	lea    (%rax,%rsi,1),%ecx
  400fe2:	39 f9                	cmp    %edi,%ecx
  400fe4:	7e 0c                	jle    400ff2 <func4+0x24>
  400fe6:	8d 51 ff             	lea    -0x1(%rcx),%edx
  400fe9:	e8 e0 ff ff ff       	call   400fce <func4>
  400fee:	01 c0                	add    %eax,%eax
  400ff0:	eb 15                	jmp    401007 <func4+0x39>
  400ff2:	b8 00 00 00 00       	mov    $0x0,%eax
  400ff7:	39 f9                	cmp    %edi,%ecx
  400ff9:	7d 0c                	jge    401007 <func4+0x39>
  400ffb:	8d 71 01             	lea    0x1(%rcx),%esi
  400ffe:	e8 cb ff ff ff       	call   400fce <func4>
  401003:	8d 44 00 01          	lea    0x1(%rax,%rax,1),%eax
  401007:	48 83 c4 08          	add    $0x8,%rsp
  40100b:	c3                   	ret    

注意第6行shr是逻辑右移指令,在C语言实现时应该先转为unsigned类型再右移。另外,sar %eax表示的是sar 1 %eax。对应的C语言函数如下:

/*
 * 函数功能是二分查找
 * 关于符号位那里,如果出现lo > hi的情况,经过处理后mid将会正确地表示中点。很巧妙的一种处理方法。
 * hi in %edx, lo in %esi, a in %edi
 */
int func4(int hi, int lo, int a) {
    int result = hi - lo;
    int mid = (unsigned)result >> 0x1f; // 逻辑右移31位获得符号位
    // 若a-b非负,则result不变,否则+1
    result += mid;
    result >>= 1;   // sar %eax
    // mid = b~a的中点
    mid = lo + result;
    
    if (mid <= a) {
        result = 0;
        if (mid >= a)
            return result;
        // 更新下界
        lo = mid + 1;
        result = func4(hi, lo, a);
        return (2 * result + 1);
    }
    else {
        // 更新上界
        hi = mid - 1;
        result = func4(hi, lo, a);
        return 2 * result;
    }
}

下面再看phase_4代码:

000000000040100c <phase_4>:
  40100c:	48 83 ec 18          	sub    $0x18,%rsp
  401010:	48 8d 4c 24 0c       	lea    0xc(%rsp),%rcx
  401015:	48 8d 54 24 08       	lea    0x8(%rsp),%rdx
  40101a:	be cf 25 40 00       	mov    $0x4025cf,%esi
  40101f:	b8 00 00 00 00       	mov    $0x0,%eax
  401024:	e8 c7 fb ff ff       	call   400bf0 <__isoc99_sscanf@plt>
  401029:	83 f8 02             	cmp    $0x2,%eax
  40102c:	75 07                	jne    401035 <phase_4+0x29>
  40102e:	83 7c 24 08 0e       	cmpl   $0xe,0x8(%rsp)
  401033:	76 05                	jbe    40103a <phase_4+0x2e>
  401035:	e8 00 04 00 00       	call   40143a <explode_bomb>
  40103a:	ba 0e 00 00 00       	mov    $0xe,%edx
  40103f:	be 00 00 00 00       	mov    $0x0,%esi
  401044:	8b 7c 24 08          	mov    0x8(%rsp),%edi
  401048:	e8 81 ff ff ff       	call   400fce <func4>
  40104d:	85 c0                	test   %eax,%eax
  40104f:	75 07                	jne    401058 <phase_4+0x4c>
  401051:	83 7c 24 0c 00       	cmpl   $0x0,0xc(%rsp)
  401056:	74 05                	je     40105d <phase_4+0x51>
  401058:	e8 dd 03 00 00       	call   40143a <explode_bomb>
  40105d:	48 83 c4 18          	add    $0x18,%rsp
  401061:	c3                   	ret    

phase_3一样,先读取两个整数。
下面是对应的C语言:

void phase_4(char *input) {
    int a, b;
    int result = 0;
    int scan_num = sscanf(input, "%d %d", &a, &b);
    if (scan_num != 2)
        explode_bomb();
    
    if (a >= 14 || a < 0)
        explode_bomb();
    result = func4(14, 0, a);
    if (result != 0 || b != 0)
        explode_bomb();
}

注意到func4里只有在更新下界的时候会让result+1,因此输入的第一个参数a在满足边界[0, 13)的前提下,只要满足调用的所有(包括递归)func4都不调整下界即可。最显而易见的答案是7,因为phase_4中是这样调用的:func4(14, 0, a),一次找到就不会调整下界。
因此最终a可选的值包括:0, 1, 3, 7;而b只能取0

phase_5

0000000000401062 <phase_5>:
  401062:	53                   	push   %rbx
  401063:	48 83 ec 20          	sub    $0x20,%rsp
  401067:	48 89 fb             	mov    %rdi,%rbx
  40106a:	64 48 8b 04 25 28 00 	mov    %fs:0x28,%rax
  401071:	00 00 
  401073:	48 89 44 24 18       	mov    %rax,0x18(%rsp)
  401078:	31 c0                	xor    %eax,%eax
  40107a:	e8 9c 02 00 00       	call   40131b <string_length>
  40107f:	83 f8 06             	cmp    $0x6,%eax
  401082:	74 4e                	je     4010d2 <phase_5+0x70>
  401084:	e8 b1 03 00 00       	call   40143a <explode_bomb>
  401089:	eb 47                	jmp    4010d2 <phase_5+0x70>
  40108b:	0f b6 0c 03          	movzbl (%rbx,%rax,1),%ecx
  40108f:	88 0c 24             	mov    %cl,(%rsp)
  401092:	48 8b 14 24          	mov    (%rsp),%rdx
  401096:	83 e2 0f             	and    $0xf,%edx
  401099:	0f b6 92 b0 24 40 00 	movzbl 0x4024b0(%rdx),%edx
  4010a0:	88 54 04 10          	mov    %dl,0x10(%rsp,%rax,1)
  4010a4:	48 83 c0 01          	add    $0x1,%rax
  4010a8:	48 83 f8 06          	cmp    $0x6,%rax
  4010ac:	75 dd                	jne    40108b <phase_5+0x29>
  4010ae:	c6 44 24 16 00       	movb   $0x0,0x16(%rsp)
  4010b3:	be 5e 24 40 00       	mov    $0x40245e,%esi
  4010b8:	48 8d 7c 24 10       	lea    0x10(%rsp),%rdi
  4010bd:	e8 76 02 00 00       	call   401338 <strings_not_equal>
  4010c2:	85 c0                	test   %eax,%eax
  4010c4:	74 13                	je     4010d9 <phase_5+0x77>
  4010c6:	e8 6f 03 00 00       	call   40143a <explode_bomb>
  4010cb:	0f 1f 44 00 00       	nopl   0x0(%rax,%rax,1)
  4010d0:	eb 07                	jmp    4010d9 <phase_5+0x77>
  4010d2:	b8 00 00 00 00       	mov    $0x0,%eax
  4010d7:	eb b2                	jmp    40108b <phase_5+0x29>
  4010d9:	48 8b 44 24 18       	mov    0x18(%rsp),%rax
  4010de:	64 48 33 04 25 28 00 	xor    %fs:0x28,%rax
  4010e5:	00 00 
  4010e7:	74 05                	je     4010ee <phase_5+0x8c>
  4010e9:	e8 42 fa ff ff       	call   400b30 <__stack_chk_fail@plt>
  4010ee:	48 83 c4 20          	add    $0x20,%rsp
  4010f2:	5b                   	pop    %rbx
  4010f3:	c3                   	ret    

对于4010b3处的指令,查看内存0x40245e处的字符串为flyers
对于401099处的指令,查看内存0x4024b0处的字符串为maduiersnfotvbylSo you think you can stop the bomb with ctrl-c, do you?,由于只有低四位是可变的,因此用到的只有从头开始的16个字符,即maduiersnfotvbyl

另外,在运行Bomb时使用ctrl-c命令结束进程时会显示后面那句话,不过Dr.evil犹豫一会儿就会让我们成功结束,他会说:“Well…OK.

上一篇:alley


下一篇:HiveSQL_添加数据load