Bomb Lab [Updated 1/12/16] (README, Writeup, Release Notes, Self-Study Handout)

A “binary bomb” is a program provided to students as an object code file. When run, it prompts the user to type in 6 different strings. If any of these is incorrect, the bomb “explodes,” printing an error message and logging the event on a grading server. Students must “defuse” their own unique bomb by disassembling and reverse engineering the program to determine what the 6 strings should be. The lab teaches students to understand assembly language, and also forces them to learn how to use a debugger. It’s also great fun. A legendary lab among the CMU undergrads.

Here’s a Linux/x86-64 binary bomb that you can try out for yourself. The feature that notifies the grading server has been disabled, so feel free to explode this bomb with impunity. If you’re an instructor with a CS:APP account, then you can download the solution.

实验作业网址:http://csapp.cs.cmu.edu/3e/labs.html

前言

本篇博客将会展示 CSAPP 之 BombLab 的拆弹过程,粉碎 Dr.Evil 的邪恶阴谋。Dr.Evil 总共设置了 6 个炸弹,每个炸弹对应一串字符串,如果字符串错误,炸弹就会被引爆。本 lab 主题为逆向工程,涉及主题包括 switch 结构、数组哈希、链表、二叉树数据结构的逆向,以及简单控制流、二分算法等的逆向。


网上许多教程都是基于naive gdb来做的,但Beej’s Quick Guide to GDB中作者都说从来不会直接使用普通模式的gdb。
本文基于IDA pro和pwndbg:

  • IDA:强大的反汇编(反编译)工具,指令流图和F5反编译用起来太爽啦。
  • pwndbg:gdb的插件,可以在调试时显示寄存器、指令列表、栈、backstrace等信息和辅助提示信息。

注意:

  • pwndbg与naive gdb使用上有些不同,比如不能使用disas ,得用disassemble ,输出的格式也不是gdb默认的AT&T格式,而是intel格式,这与IDA反汇编格式相同,看起来更方便。
  • 使用naive gdb对熟练gdb很有好处,但作为使用十个手指的人类,我还是更喜欢看IDA强大的反编译工具,和pwndbg辅助插件。

phase_1

pwndbg>disassemble main 可以看到main()函数反汇编结果,可以观察到:

1
2
3
4
0x0000000000400e32 <+146>:   call   0x40149e <read_line>
0x0000000000400e37 <+151>: mov rdi,rax
0x0000000000400e3a <+154>: call 0x400ee0 <phase_1>
0x0000000000400e3f <+159>: call 0x4015c4 <phase_defused>

read_line的返回值rax是读入的字符串地址,传给了rdi,应当是要作为phase_1的参数,里面存放的是我们的输入值。查看phase_1的反汇编代码,可知正确答案放在0x401338的位置,赋给esi后作为strings_not_equal函数的参数。

1
2
3
4
5
6
7
8
──────────────────────────────────────────────────────────────────────────────[ DISASM / x86-64 / set emulate on ]───────────────────────────────────────────────────────────────────────────────
0x400ee0 <phase_1> sub rsp, 8
0x400ee4 <phase_1+4> mov esi, 0x402400
0x400ee9 <phase_1+9> call strings_not_equal <strings_not_equal>
rdi: 0x603780 (input_strings) ◂— 0x363636616b6a /* 'jka666' */
rsi: 0x402400 ◂— outsd dx, dword ptr [rsi] /* 'Border relations with Canada have never been better.' */
rdx: 0x1
rcx: 0x6

IDA pro反编译结果:

1
2
3
4
5
6
7
8
9
__int64 __fastcall phase_1(__int64 a1)
{
__int64 result; // rax

result = strings_not_equal(a1, "Border relations with Canada have never been better.");
if ( (_DWORD)result )
explode_bomb(a1, "Border relations with Canada have never been better.");
return result;
}

phase_2

使用IDA pro 对phase_2()read_six_numbers()反编译结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
__int64 __fastcall phase_2(__int64 a1)  // a1 = RDI = RAX 是readline的返回字符串地址
{
__int64 result; // rax
char *v2; // rbx
int v3; // [rsp+0h] [rbp-38h] BYREF // 特意将6字节数组表示为3个变量
char v4; // [rsp+4h] [rbp-34h] BYREF // 方便循环特殊情况表示
char v5; // [rsp+18h] [rbp-20h] BYREF //

read_six_numbers(a1, &v3); // read_six_number会将读入结果放到rsp往后18h=24个字节
if ( v3 != 1 ) // 首元素需为1
explode_bomb(a1, &v3);
v2 = &v4;
do
{
result = (unsigned int)(2 * *((_DWORD *)v2 - 1)); // result = 2*arr[i-1]
if ( *(_DWORD *)v2 != (_DWORD)result )
explode_bomb(a1, &v3);
v2 += 4; // 转成_DWORD*加1, 或char*加4
}
while ( v2 != &v5 );
return result;
}

__int64 __fastcall read_six_numbers(__int64 a1, __int64 a2)
{
__int64 result; // rax

result = __isoc99_sscanf(a1, "%d %d %d %d %d %d", a2, a2 + 4, a2 + 8, a2 + 12, a2 + 16, a2 + 20);
if ( (int)result <= 5 )
explode_bomb(a1, "%d %d %d %d %d %d");
return result;
}

程序会使用sscanf() 从rax地址的字符串中读取6个整数到rsp往后的24=18h个字节处,读入第一个数字需要为1,后面每个数字需要是前一个数字的2倍。

phase_3

反编译 phase_3()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
__int64 __fastcall phase_3(__int64 a1)
{
__int64 v1; // rdx
__int64 v2; // rcx
__int64 result; // rax
int v4; // [rsp+8h] [rbp-10h] BYREF
int v5; // [rsp+Ch] [rbp-Ch] BYREF

if ( (int)__isoc99_sscanf(a1, "%d %d", &v4, &v5) <= 1 )
explode_bomb(a1, "%d %d", v1, v2);
switch ( v4 )
{
case 0:
result = 207LL;
break;
case 1:
result = 311LL;
break;
case 2:
result = 707LL;
break;
case 3:
result = 256LL;
break;
case 4:
result = 389LL;
break;
case 5:
result = 206LL;
break;
case 6:
result = 682LL;
break;
case 7:
result = 327LL;
break;
default:
explode_bomb(a1, "%d %d", v1, v2);
return result;
}
if ( (_DWORD)result != v5 )
explode_bomb(a1, "%d %d");
return result;
}

phase_4

phase_4()func4() 的反编译结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
__int64 __fastcall phase_4(__int64 a1)
{
__int64 result; // rax
unsigned int v2; // [rsp+8h] [rbp-10h] BYREF // IDA是根据jbe判断为无符号数的
int v3; // [rsp+Ch] [rbp-Ch] BYREF

if ( (unsigned int)__isoc99_sscanf(a1, "%d %d", &v2, &v3) != 2 || v2 > 0xE )
explode_bomb();
result = func4(v2, 0LL, 14LL);
if ( (_DWORD)result || v3 )
explode_bomb();
return result;
}

__int64 __fastcall func4(__int64 a1, __int64 a2, __int64 a3)
{
int v3; // ecx
__int64 result; // rax

v3 = ((int)a3 - (int)a2) / 2 + a2; // v3 = (a2+a3)/2 = 7L
if ( v3 > (int)a1 )
return 2 * (unsigned int)func4(a1, a2, (unsigned int)(v3 - 1));
result = 0LL;
if ( v3 < (int)a1 )
result = 2 * (unsigned int)func4(a1, (unsigned int)(v3 + 1), a3) + 1;
return result;
}

观察 phase_4() ,输入的第一个数字v2不能大于14,第二个数字v3必须为0,且func4()必须返回0。

观察 func4(),其实就可以获得一组解:7 0 ,结果进行再整理一下可以看出是个二分算法(但具体是干啥的猜不出,欢迎大神在评论区解答)。

1
2
3
4
5
6
7
8
9
10
int func4(int n, int i, int j) {
if (val == n) return 0;

int val = (j + i) / 2;
if (val <= n) {
return 2 * func4(n, val+1, j) + 1;
} else {
return 2 * func4(n, i, val-1);
}
}

phase_4共有4组解:

1
2
3
4
0 0
1 0
3 0
7 0

phase_5

反编译代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
unsigned __int64 __fastcall phase_5(__int64 a1, const char *a2)
{
__int64 i; // rax
char v4[8]; // [rsp+10h] [rbp-18h] BYREF
unsigned __int64 v5; // [rsp+18h] [rbp-10h]

v5 = __readfsqword(0x28u);
if ( (unsigned int)string_length() != 6 )
explode_bomb(a1, a2);
for ( i = 0LL; i != 6; ++i )
v4[i] = array_3449[*(_BYTE *)(a1 + i) & 0xF];
v4[6] = 0;
if ( (unsigned int)strings_not_equal(v4, "flyers") )
explode_bomb(v4, "flyers");
return __readfsqword(0x28u) ^ v5;
}

实际上还要参考反汇编代码和调试运行时信息才能把这题做好:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
pwndbg> disassemble phase_5
Dump of assembler code for function phase_5:
0x0000000000401062 <+0>: push rbx
0x0000000000401063 <+1>: sub rsp,0x20
0x0000000000401067 <+5>: mov rbx,rdi ; rbx放输入字符串
=> 0x000000000040106a <+8>: mov rax,QWORD PTR fs:0x28
0x0000000000401073 <+17>: mov QWORD PTR [rsp+0x18],rax ; 防护,设置一个canary,可以不管它
0x0000000000401078 <+22>: xor eax,eax
0x000000000040107a <+24>: call 0x40131b <string_length>
0x000000000040107f <+29>: cmp eax,0x6
0x0000000000401082 <+32>: je 0x4010d2 <phase_5+112>
0x0000000000401084 <+34>: call 0x40143a <explode_bomb>
----------------------------
0x0000000000401089 <+39>: jmp 0x4010d2 <phase_5+112>
---------------------------- for (参考IDA反编译代码)
0x000000000040108b <+41>: movzx ecx,BYTE PTR [rbx+rax*1] ; 逐字符访问输入
0x000000000040108f <+45>: mov BYTE PTR [rsp],cl
0x0000000000401092 <+48>: mov rdx,QWORD PTR [rsp]
0x0000000000401096 <+52>: and edx,0xf ; 取字符低4
0x0000000000401099 <+55>: movzx edx,BYTE PTR [rdx+0x4024b0]
0x00000000004010a0 <+62>: mov BYTE PTR [rsp+rax*1+0x10],dl
0x00000000004010a4 <+66>: add rax,0x1
0x00000000004010a8 <+70>: cmp rax,0x6
0x00000000004010ac <+74>: jne 0x40108b <phase_5+41>
----------------------------
0x00000000004010ae <+76>: mov BYTE PTR [rsp+0x16],0x0
0x00000000004010b3 <+81>: mov esi,0x40245e ; "flyers"
0x00000000004010b8 <+86>: lea rdi,[rsp+0x10]
0x00000000004010bd <+91>: call 0x401338 <strings_not_equal>
0x00000000004010c2 <+96>: test eax,eax
0x00000000004010c4 <+98>: je 0x4010d9 <phase_5+119>
0x00000000004010c6 <+100>: call 0x40143a <explode_bomb>
0x00000000004010cb <+105>: nop DWORD PTR [rax+rax*1+0x0]
0x00000000004010d0 <+110>: jmp 0x4010d9 <phase_5+119>
----------------------------
0x00000000004010d2 <+112>: mov eax,0x0
0x00000000004010d7 <+117>: jmp 0x40108b <phase_5+41>
----------------------------
0x00000000004010d9 <+119>: mov rax,QWORD PTR [rsp+0x18]
0x00000000004010de <+124>: xor rax,QWORD PTR fs:0x28
0x00000000004010e7 <+133>: je 0x4010ee <phase_5+140>
0x00000000004010e9 <+135>: call 0x400b30 <__stack_chk_fail@plt>
0x00000000004010ee <+140>: add rsp,0x20
0x00000000004010f2 <+144>: pop rbx
0x00000000004010f3 <+145>: ret

pwndbg> x/s 0x4024b0
0x4024b0 <array.3449>: "maduiersnfotvbylSo you think you can stop the bomb with ctrl-c, do you?"

或在IDA中去看这段数据:
.rodata:00000000004024B0 array_3449 db 6Dh ; DATA XREF: phase_5+37↑r
.rodata:00000000004024B1 db 61h ; a
.rodata:00000000004024B2 db 64h ; d
.rodata:00000000004024B3 db 75h ; u
.rodata:00000000004024B4 db 69h ; i
.rodata:00000000004024B5 db 65h ; e
.rodata:00000000004024B6 db 72h ; r
.rodata:00000000004024B7 db 73h ; s
.rodata:00000000004024B8 db 6Eh ; n
.rodata:00000000004024B9 db 66h ; f
.rodata:00000000004024BA db 6Fh ; o
.rodata:00000000004024BB db 74h ; t
.rodata:00000000004024BC db 76h ; v
.rodata:00000000004024BD db 62h ; b
.rodata:00000000004024BE db 79h ; y
.rodata:00000000004024BF db 6Ch ; l
.rodata:00000000004024C0 ; const char aSoYouThinkYouC[]
.rodata:00000000004024C0 aSoYouThinkYouC db 'So you think you can stop the bomb with ctrl-c, do you?',0
.rodata:00000000004024C0 ; DATA XREF: sig_handler+4↑o
.rodata:00000000004024F8 ; const char aCursesYouVeFou[]
.rodata:00000000004024F8 aCursesYouVeFou db 'Curses, you',27h,'ve found the secret phase!',0
.rodata:00000000004024F8 ; DATA XREF: phase_defused+53↑o

看反汇编代码时可参考IDA生成的指令流图:

Untitled

程序意图为:读取一个长度为6的字符串,对于每个字符截取后四位数字,用来作为index获取另一个字符串里对应的字符,并保存起来,产生一个新的长度为6的字符串,要求等于另一个字符串。

根据IDA中.rodata中0x4024b0处数据可以直观看到:”flyers”各字符在0x4024b0字符串中分别由索引9FE567 获得,输入的字符串后4位需分别为这些数。参考ASCII表:

Untitled

可构造出输入:9?>567


前排提示:phase_6 和 secret_phase 难度稍大,想速通的玩家可以先跳过。

phase_6

对于涉及复杂数据结构的程序,IDA的反编译结果可读性不佳(反编译代码甚至比反汇编还长),这里不妨先看看反汇编的结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
pwndbg> disassemble phase_6
Dump of assembler code for function phase_6:
0x00000000004010f4 <+0>: push r14
0x00000000004010f6 <+2>: push r13
0x00000000004010f8 <+4>: push r12
0x00000000004010fa <+6>: push rbp
0x00000000004010fb <+7>: push rbx
0x00000000004010fc <+8>: sub rsp,0x50
0x0000000000401100 <+12>: mov r13,rsp
0x0000000000401103 <+15>: mov rsi,rsp
0x0000000000401106 <+18>: call 0x40145c <read_six_numbers> // 6个输入数字读在rsp位置
---------------------------- 外层 for
0x000000000040110b <+23>: mov r14,rsp
0x000000000040110e <+26>: mov r12d,0x0
0x0000000000401114 <+32>: mov rbp,r13
0x0000000000401117 <+35>: mov eax,DWORD PTR [r13+0x0] ; rbp,r14,r13都等于rsp, rsi在进入read函数后被改
0x000000000040111b <+39>: sub eax,0x1
0x000000000040111e <+42>: cmp eax,0x5
0x0000000000401121 <+45>: jbe 0x401128 <phase_6+52> ; 6个元素都: 减1后小于等于5
0x0000000000401123 <+47>: call 0x40143a <explode_bomb>
0x0000000000401128 <+52>: add r12d,0x1
0x000000000040112c <+56>: cmp r12d,0x6
0x0000000000401130 <+60>: je 0x401153 <phase_6+95>
---------------------------- 内层 for
0x0000000000401132 <+62>: mov ebx,r12d
0x0000000000401135 <+65>: movsxd rax,ebx
0x0000000000401138 <+68>: mov eax,DWORD PTR [rsp+rax*4]
0x000000000040113b <+71>: cmp DWORD PTR [rbp+0x0],eax
0x000000000040113e <+74>: jne 0x401145 <phase_6+81>
0x0000000000401140 <+76>: call 0x40143a <explode_bomb>
0x0000000000401145 <+81>: add ebx,0x1
0x0000000000401148 <+84>: cmp ebx,0x5
0x000000000040114b <+87>: jle 0x401135 <phase_6+65>
0x000000000040114d <+89>: add r13,0x4
0x0000000000401151 <+93>: jmp 0x401114 <phase_6+32>
----------------------------
0x0000000000401153 <+95>: lea rsi,[rsp+0x18] ; 输入的结尾\0
0x0000000000401158 <+100>: mov rax,r14
0x000000000040115b <+103>: mov ecx,0x7
0x0000000000401160 <+108>: mov edx,ecx
0x0000000000401162 <+110>: sub edx,DWORD PTR [rax]
0x0000000000401164 <+112>: mov DWORD PTR [rax],edx
0x0000000000401166 <+114>: add rax,0x4
0x000000000040116a <+118>: cmp rax,rsi
0x000000000040116d <+121>: jne 0x401160 <phase_6+108>
0x000000000040116f <+123>: mov esi,0x0
0x0000000000401174 <+128>: jmp 0x401197 <phase_6+163>
----------------------------
0x0000000000401176 <+130>: mov rdx,QWORD PTR [rdx+0x8]
0x000000000040117a <+134>: add eax,0x1
0x000000000040117d <+137>: cmp eax,ecx
0x000000000040117f <+139>: jne 0x401176 <phase_6+130>
0x0000000000401181 <+141>: jmp 0x401188 <phase_6+148>
0x0000000000401183 <+143>: mov edx,0x6032d0
0x0000000000401188 <+148>: mov QWORD PTR [rsp+rsi*2+0x20],rdx
0x000000000040118d <+153>: add rsi,0x4
0x0000000000401191 <+157>: cmp rsi,0x18
0x0000000000401195 <+161>: je 0x4011ab <phase_6+183>
----------------------------
0x0000000000401197 <+163>: mov ecx,DWORD PTR [rsp+rsi*1]
0x000000000040119a <+166>: cmp ecx,0x1
0x000000000040119d <+169>: jle 0x401183 <phase_6+143>
0x000000000040119f <+171>: mov eax,0x1
0x00000000004011a4 <+176>: mov edx,0x6032d0
0x00000000004011a9 <+181>: jmp 0x401176 <phase_6+130>

0x00000000004011ab <+183>: mov rbx,QWORD PTR [rsp+0x20]
0x00000000004011b0 <+188>: lea rax,[rsp+0x28]
0x00000000004011b5 <+193>: lea rsi,[rsp+0x50]
0x00000000004011ba <+198>: mov rcx,rbx
0x00000000004011bd <+201>: mov rdx,QWORD PTR [rax]
0x00000000004011c0 <+204>: mov QWORD PTR [rcx+0x8],rdx
0x00000000004011c4 <+208>: add rax,0x8
0x00000000004011c8 <+212>: cmp rax,rsi
0x00000000004011cb <+215>: je 0x4011d2 <phase_6+222>
0x00000000004011cd <+217>: mov rcx,rdx
0x00000000004011d0 <+220>: jmp 0x4011bd <phase_6+201>
0x00000000004011d2 <+222>: mov QWORD PTR [rdx+0x8],0x0

0x00000000004011da <+230>: mov ebp,0x5
0x00000000004011df <+235>: mov rax,QWORD PTR [rbx+0x8]
0x00000000004011e3 <+239>: mov eax,DWORD PTR [rax]
0x00000000004011e5 <+241>: cmp DWORD PTR [rbx],eax
0x00000000004011e7 <+243>: jge 0x4011ee <phase_6+250>
0x00000000004011e9 <+245>: call 0x40143a <explode_bomb>
0x00000000004011ee <+250>: mov rbx,QWORD PTR [rbx+0x8]
0x00000000004011f2 <+254>: sub ebp,0x1
0x00000000004011f5 <+257>: jne 0x4011df <phase_6+235>
0x00000000004011f7 <+259>: add rsp,0x50
0x00000000004011fb <+263>: pop rbx
0x00000000004011fc <+264>: pop rbp
0x00000000004011fd <+265>: pop r12
0x00000000004011ff <+267>: pop r13
0x0000000000401201 <+269>: pop r14
0x0000000000401203 <+271>: ret
End of assembler dump.

太复杂了,化整为零,一步步来

Part 6.1 预检测

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
   0x00000000004010f4 <+0>:     push   r14
0x00000000004010f6 <+2>: push r13
0x00000000004010f8 <+4>: push r12
0x00000000004010fa <+6>: push rbp
0x00000000004010fb <+7>: push rbx
0x00000000004010fc <+8>: sub rsp,0x50
0x0000000000401100 <+12>: mov r13,rsp
0x0000000000401103 <+15>: mov rsi,rsp
0x0000000000401106 <+18>: call 0x40145c <read_six_numbers> ; 6个输入数字读在rsp, rsi在进入read函数后被改
---------------------------- 外层for, 索引为r12
0x000000000040110b <+23>: mov r14,rsp
0x000000000040110e <+26>: mov r12d,0x0 ; r12初始化为0

0x0000000000401114 <+32>: mov rbp,r13
0x0000000000401117 <+35>: mov eax,DWORD PTR [r13+0x0] ; rbp,r14,r13都等于rsp
0x000000000040111b <+39>: sub eax,0x1
0x000000000040111e <+42>: cmp eax,0x5
0x0000000000401121 <+45>: jbe 0x401128 <phase_6+52> ; 6个元素都: -1后小于等于5
0x0000000000401123 <+47>: call 0x40143a <explode_bomb>
0x0000000000401128 <+52>: add r12d,0x1
0x000000000040112c <+56>: cmp r12d,0x6
0x0000000000401130 <+60>: je 0x401153 <phase_6+95>
---------------------------- 内层for, 索引为ebx
0x0000000000401132 <+62>: mov ebx,r12d

0x0000000000401135 <+65>: movsxd rax,ebx ; rax = r12 = i
0x0000000000401138 <+68>: mov eax,DWORD PTR [rsp+rax*4]
0x000000000040113b <+71>: cmp DWORD PTR [rbp+0x0],eax ; rbp指向rsp, 输入第1个元素
0x000000000040113e <+74>: jne 0x401145 <phase_6+81>
0x0000000000401140 <+76>: call 0x40143a <explode_bomb>
0x0000000000401145 <+81>: add ebx,0x1
0x0000000000401148 <+84>: cmp ebx,0x5
0x000000000040114b <+87>: jle 0x401135 <phase_6+65>
0x000000000040114d <+89>: add r13,0x4
0x0000000000401151 <+93>: jmp 0x401114 <phase_6+32>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
__int64 __fastcall phase_6(__int64 a1)
{
int *v1; // r13
int v2; // er12
int v3; // ebx
char *v4; // rax
unsigned __int64 i; // rsi
_QWORD *v6; // rdx
int v7; // eax
int v8; // ecx
__int64 v9; // rbx
char *v10; // rax
__int64 j; // rcx
__int64 v12; // rdx
int v13; // ebp
__int64 result; // rax
int v15[6]; // [rsp+0h] [rbp-78h] BYREF
char v16; // [rsp+18h] [rbp-60h] BYREF 18h就是24l,也就是输入字符串结尾\0的位置
__int64 v17; // [rsp+20h] [rbp-58h]
char v18; // [rsp+28h] [rbp-50h] BYREF
char v19[40]; // [rsp+50h] [rbp-28h] BYREF

v1 = v15;
read_six_numbers(a1, (__int64)v15);
v2 = 0;
while ( 1 )
{
if ( (unsigned int)(*v1 - 1) > 5 )
explode_bomb(a1, (const char *)v15);
if ( ++v2 == 6 )
break;
v3 = v2;
do
{
if ( *v1 == v15[v3] )
explode_bomb(a1, (const char *)v15);
++v3;
}
while ( v3 <= 5 );
++v1;
}

其实就是等价于:

1
2
3
4
5
6
7
for  (int i = 0; i < 6; i++) {
if (arr[i] - 1 > 5) bomb()
for (int j = i+1; j <= 5; j++) {
if(arr[j] == arr[i]) bomb()
}
}
// 也就是需要输入的6个数字都不大于6,且互异

Part 6.2 七减处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
 ---------------------------- for: 使用rax作为循环变量,初始化为r14=rsp
0x0000000000401153 <+95>: lea rsi,[rsp+0x18] ; 输入的结尾\0
0x0000000000401158 <+100>: mov rax,r14 ; r14初始时指向rsp, 输入的首地址
0x000000000040115b <+103>: mov ecx,0x7

0x0000000000401160 <+108>: mov edx,ecx
0x0000000000401162 <+110>: sub edx,DWORD PTR [rax]
0x0000000000401164 <+112>: mov DWORD PTR [rax],edx
0x0000000000401166 <+114>: add rax,0x4
0x000000000040116a <+118>: cmp rax,rsi
0x000000000040116d <+121>: jne 0x401160 <phase_6+108>

0x000000000040116f <+123>: mov esi,0x0
0x0000000000401174 <+128>: jmp 0x401197 <phase_6+163>

程序目的是,原地修改数组:元素值 = 7 - 元素值。

1
2
3
for (i = 0; i < 6; ++i)
arr[i] = 7 - arr[i]
// 下面将转变后的arr数组称呼为narr

Part 6.3 根据输入narr[i]将node[narr[i]]的地址复制到stack[nnode[i]]

程序入口为<phase_6+163>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
---------------------------- 内层 for eax: 1 ~ rcx-1, rcx = narr[i]
0x0000000000401176 <+130>: mov rdx,QWORD PTR [rdx+0x8] ; node[i] = node[i + 1], 一次移动4个字(1个node),注意这里只移动rdx,不改变node
0x000000000040117a <+134>: add eax,0x1
0x000000000040117d <+137>: cmp eax,ecx
0x000000000040117f <+139>: jne 0x401176 <phase_6+130>
0x0000000000401181 <+141>: jmp 0x401188 <phase_6+148>

0x0000000000401183 <+143>: mov edx,0x6032d0

0x0000000000401188 <+148>: mov QWORD PTR [rsp+rsi*2+0x20],rdx ; 前面的rsp~rsp+0x20放输入(6B)和结束标志, 这里开始放node, 一次操作8字节(QWORD)
0x000000000040118d <+153>: add rsi,0x4 ; 1个node是4B ; 对narr中的每个数字
0x0000000000401191 <+157>: cmp rsi,0x18
0x0000000000401195 <+161>: je 0x4011ab <phase_6+183>

---------------------------- 外层 for rsi <- i, i: 0 ~ 5
0x0000000000401197 <+163>: mov ecx,DWORD PTR [rsp+rsi*1] ; rsi在jmp前初始化为0, 在+153-+161进行变动和判断
0x000000000040119a <+166>: cmp ecx,0x1 ; 判断数组中每个数是否小于等于1
0x000000000040119d <+169>: jle 0x401183 <phase_6+143>
0x000000000040119f <+171>: mov eax,0x1
0x00000000004011a4 <+176>: mov edx,0x6032d0 ; rdx指向node1
0x00000000004011a9 <+181>: jmp 0x401176 <phase_6+130>

等价代码:

1
2
3
for (i = 0; i < 6; ++i)
stack[nnode[i]] = node[narr[i]] // nnode表示栈上放node的地址,其位置与i相关,值由narr[i]确定
// 栈上存放的是node的地址(1qword = 64b),不是完整的node

+176处有个奇怪的地址,可以往下按16进制格式查30个word的数据,如下:

1
2
3
4
5
6
7
8
9
pwndbg> x/30wx 0x6032d0
0x6032d0 <node1>: 0x0000014c 0x00000001 0x006032e0 0x00000000 ; 0x006032e0就是下一个node的地址
0x6032e0 <node2>: 0x000000a8 0x00000002 0x006032f0 0x00000000
0x6032f0 <node3>: 0x0000039c 0x00000003 0x00603300 0x00000000
0x603300 <node4>: 0x000002b3 0x00000004 0x00603310 0x00000000
0x603310 <node5>: 0x000001dd 0x00000005 0x00603320 0x00000000
0x603320 <node6>: 0x000001bb 0x00000006 0x00000000 0x00000000
0x603330: 0x00000000 0x00000000 0x00000000 0x00000000
0x603340 <host_table>: 0x00402629 0x00000000

可以猜测使用到了链表结构,这个链表使用的结构体类似于:

1
2
3
4
5
struct {
int sth; // 某四字节内容,关于其用途先按下不表,后面会看到
int input; // 与我们的输入有关
node* next; // 下一个node地址
}

Part 6.4

1
2
3
4
5
6
7
8
9
10
11
12
13
0x00000000004011ab <+183>:   mov    rbx,QWORD PTR [rsp+0x20] 
0x00000000004011b0 <+188>: lea rax,[rsp+0x28]
0x00000000004011b5 <+193>: lea rsi,[rsp+0x50] ; 结尾
0x00000000004011ba <+198>: mov rcx,rbx

0x00000000004011bd <+201>: mov rdx,QWORD PTR [rax]
0x00000000004011c0 <+204>: mov QWORD PTR [rcx+0x8],rdx
0x00000000004011c4 <+208>: add rax,0x8
0x00000000004011c8 <+212>: cmp rax,rsi
0x00000000004011cb <+215>: je 0x4011d2 <phase_6+222>
0x00000000004011cd <+217>: mov rcx,rdx
0x00000000004011d0 <+220>: jmp 0x4011bd <phase_6+201>
0x00000000004011d2 <+222>: mov QWORD PTR [rdx+0x8],0x0 ; 设置链表结尾为null

运行时信息:可以看到确实将node移动到了rsp+20的位置:由于输入为1 2 3 4 5 6,用7减后得6 5 4 3 2 1, 分别对应node6 - node1。若输入改为4 3 2 1 5 6,分别为node 3 4 5 6 2 1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
0x4011ab <phase_6+183>    mov    rbx, qword ptr [rsp + 0x20]
0x4011b0 <phase_6+188> lea rax, [rsp + 0x28]
0x4011b5 <phase_6+193> lea rsi, [rsp + 0x50]
0x4011ba <phase_6+198> mov rcx, rbx
0x4011bd <phase_6+201> mov rdx, qword ptr [rax]
0x4011c0 <phase_6+204> mov qword ptr [rcx + 8], rdx
────────────────────────────────────────────────────────────────────────────────────────────[ STACK ]────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ r14 rsp 0x7fffffffdc00 ◂— 0x500000006
01:00080x7fffffffdc08 ◂— 0x300000004
02:0010│ r13-4 rbp-4 0x7fffffffdc10 ◂— 0x100000002
03:00180x7fffffffdc18 ◂— 0x0
04:00200x7fffffffdc20 —▸ 0x603320 (node6) ◂— 0x6000001bb
05:00280x7fffffffdc28 —▸ 0x603310 (node5) ◂— 0x5000001dd
06:00300x7fffffffdc30 —▸ 0x603300 (node4) ◂— 0x4000002b3
07:00380x7fffffffdc38 —▸ 0x6032f0 (node3) ◂— 0x30000039c

Untitled

1
2
3
4
for (i = 1; i < 6; ++i)
(*stack[nnode[i]]).next = *stack[nnode[i + 1]]

node[6].next = nullptr

注意搬到栈的数据顺序不一定是654321,这样一个个往后又重新排序好了,我们可以猜测其目的是要进行遍历。

Part 6.5 遍历检查链表中值大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
---------------------------- for ebp: 5~1
0x00000000004011da <+230>: mov ebp,0x5

0x00000000004011df <+235>: mov rax,QWORD PTR [rbx+0x8] ; rbx指向头指针
0x00000000004011e3 <+239>: mov eax,DWORD PTR [rax] ; 移动单字节: 是node的第一个字段
0x00000000004011e5 <+241>: cmp DWORD PTR [rbx],eax ; 比的也是node.sth
0x00000000004011e7 <+243>: jge 0x4011ee <phase_6+250>
0x00000000004011e9 <+245>: call 0x40143a <explode_bomb>]

0x00000000004011ee <+250>: mov rbx,QWORD PTR [rbx+0x8] ; 访问next字段
0x00000000004011f2 <+254>: sub ebp,0x1
0x00000000004011f5 <+257>: jne 0x4011df <phase_6+235>
0x00000000004011f7 <+259>: add rsp,0x50
0x00000000004011fb <+263>: pop rbx
0x00000000004011fc <+264>: pop rbp
0x00000000004011fd <+265>: pop r12
0x00000000004011ff <+267>: pop r13
0x0000000000401201 <+269>: pop r14
0x0000000000401203 <+271>: ret

下面果然对链表进行了遍历,看每个元素是否都大于后一个元素,若否则引爆炸弹。

1
2
3
4
5
6
7
8
idx = 0
for (i = 5; i > 0; --i) {
if (node[idx].sth > node[idx+1].sth) {
idx++;
continue;
}
bomb();
}

使用x/30wx 0x6032d0查看nodes的sth值大小,确认从大到小的顺序为:node 3 4 5 6 1 2,对应输入为 4 3 2 1 6 5

secret_phase

Part s.1 phase_defused

在读入6条字符串后检查第4个字符串是否由3部分组成,第3部分是"DrEvil"

反汇编:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
unsigned __int64 phase_defused()
{
char v1; // [rsp+8h] [rbp-70h] BYREF
char v2; // [rsp+Ch] [rbp-6Ch] BYREF
char v3[88]; // [rsp+10h] [rbp-68h] BYREF
unsigned __int64 v4; // [rsp+68h] [rbp-10h]

v4 = __readfsqword(0x28u);
if ( num_input_strings == 6 )
{
if ( (unsigned int)__isoc99_sscanf(&unk_603870, "%d %d %s", &v1, &v2, v3) == 3
&& !(unsigned int)strings_not_equal(v3, "DrEvil") )
{
puts("Curses, you've found the secret phase!");
puts("But finding it and solving it are quite different...");
secret_phase();
}
puts("Congratulations! You've defused the bomb!");
}
return __readfsqword(0x28u) ^ v4;
}

反编译:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
pwndbg> disassemble phase_defused
Dump of assembler code for function phase_defused:
0x00000000004015c4 <+0>: sub rsp,0x78
0x00000000004015c8 <+4>: mov rax,QWORD PTR fs:0x28
0x00000000004015d1 <+13>: mov QWORD PTR [rsp+0x68],rax
0x00000000004015d6 <+18>: xor eax,eax
0x00000000004015d8 <+20>: cmp DWORD PTR [rip+0x202181],0x6 # 0x603760 <num_input_strings>
0x00000000004015df <+27>: jne 0x40163f <phase_defused+123>
0x00000000004015e1 <+29>: lea r8,[rsp+0x10]
0x00000000004015e6 <+34>: lea rcx,[rsp+0xc]
0x00000000004015eb <+39>: lea rdx,[rsp+0x8]
0x00000000004015f0 <+44>: mov esi,0x402619
0x00000000004015f5 <+49>: mov edi,0x603870
0x00000000004015fa <+54>: call 0x400bf0 <__isoc99_sscanf@plt>
0x00000000004015ff <+59>: cmp eax,0x3
0x0000000000401602 <+62>: jne 0x401635 <phase_defused+113>
0x0000000000401604 <+64>: mov esi,0x402622
0x0000000000401609 <+69>: lea rdi,[rsp+0x10]
0x000000000040160e <+74>: call 0x401338 <strings_not_equal>
0x0000000000401613 <+79>: test eax,eax
0x0000000000401615 <+81>: jne 0x401635 <phase_defused+113>
0x0000000000401617 <+83>: mov edi,0x4024f8
0x000000000040161c <+88>: call 0x400b10 <puts@plt>
0x0000000000401621 <+93>: mov edi,0x402520
0x0000000000401626 <+98>: call 0x400b10 <puts@plt>
0x000000000040162b <+103>: mov eax,0x0
0x0000000000401630 <+108>: call 0x401242 <secret_phase>
0x0000000000401635 <+113>: mov edi,0x402558
0x000000000040163a <+118>: call 0x400b10 <puts@plt>
0x000000000040163f <+123>: mov rax,QWORD PTR [rsp+0x68]
0x0000000000401644 <+128>: xor rax,QWORD PTR fs:0x28
0x000000000040164d <+137>: je 0x401654 <phase_defused+144>
0x000000000040164f <+139>: call 0x400b30 <__stack_chk_fail@plt>
0x0000000000401654 <+144>: add rsp,0x78
0x0000000000401658 <+148>: ret
End of assembler dump.

Untitled

Part s.2 secret_phase

使用strtol()解析输入,输入第一部分的数字不能大于1000,且传到fun7()后返回值必须为2.

反编译:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
__int64 secret_phase()
{
const char *v0; // rdi
unsigned int v1; // ebx

v0 = read_line();
v1 = strtol(v0, 0LL, 10);
if ( v1 - 1 > 1000 )
((void (*)(const char *, const char *, ...))explode_bomb)(v0, 0LL);
if ( (unsigned int)fun7((__int64)&n1, v1) != 2 )
((void (*)(void *, const char *, ...))explode_bomb)(&n1, (const char *)v1);
puts("Wow! You've defused the secret stage!");
return phase_defused();
}

反汇编:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
dump of assembler code for function secret_phase:
0x0000000000401242 <+0>: push rbx
0x0000000000401243 <+1>: call 0x40149e <read_line>
0x0000000000401248 <+6>: mov edx,0xa
0x000000000040124d <+11>: mov esi,0x0
0x0000000000401252 <+16>: mov rdi,rax
0x0000000000401255 <+19>: call 0x400bd0 <strtol@plt>
0x000000000040125a <+24>: mov rbx,rax
0x000000000040125d <+27>: lea eax,[rax-0x1]
0x0000000000401260 <+30>: cmp eax,0x3e8
0x0000000000401265 <+35>: jbe 0x40126c <secret_phase+42>
0x0000000000401267 <+37>: call 0x40143a <explode_bomb>
0x000000000040126c <+42>: mov esi,ebx
0x000000000040126e <+44>: mov edi,0x6030f0
0x0000000000401273 <+49>: call 0x401204 <fun7>
0x0000000000401278 <+54>: cmp eax,0x2
0x000000000040127b <+57>: je 0x401282 <secret_phase+64>
0x000000000040127d <+59>: call 0x40143a <explode_bomb>
0x0000000000401282 <+64>: mov edi,0x402438
0x0000000000401287 <+69>: call 0x400b10 <puts@plt>
0x000000000040128c <+74>: call 0x4015c4 <phase_defused>
0x0000000000401291 <+79>: pop rbx
0x0000000000401292 <+80>: ret
End of assembler dump.

Part s.3 func7

最后一步的难点是看出来这是在二叉树上进行运算。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
0x401273 <secret_phase+49>    call   fun7                      <fun7>
rdi: 0x6030f0 (n1) ◂— 0x24 /* '$' */
rsi: 0x29a
rdx: 0x0
rcx: 0x603963 (input_strings+483) —▸ 0x616b6a ◂— 0x0

pwndbg> disassemb fun7
Dump of assembler code for function fun7:
0x0000000000401204 <+0>: sub rsp,0x8
0x0000000000401208 <+4>: test rdi,rdi
0x000000000040120b <+7>: je 0x401238 <fun7+52>
0x000000000040120d <+9>: mov edx,DWORD PTR [rdi]
0x000000000040120f <+11>: cmp edx,esi
0x0000000000401211 <+13>: jle 0x401220 <fun7+28>
0x0000000000401213 <+15>: mov rdi,QWORD PTR [rdi+0x8]
0x0000000000401217 <+19>: call 0x401204 <fun7>
0x000000000040121c <+24>: add eax,eax
0x000000000040121e <+26>: jmp 0x40123d <fun7+57>

0x0000000000401220 <+28>: mov eax,0x0
0x0000000000401225 <+33>: cmp edx,esi
0x0000000000401227 <+35>: je 0x40123d <fun7+57>

0x0000000000401229 <+37>: mov rdi,QWORD PTR [rdi+0x10]
0x000000000040122d <+41>: call 0x401204 <fun7>
0x0000000000401232 <+46>: lea eax,[rax+rax*1+0x1]
0x0000000000401236 <+50>: jmp 0x40123d <fun7+57>
---------------------------- 如果rdi == 0 就返回0xffffffff
0x0000000000401238 <+52>: mov eax,0xffffffff

0x000000000040123d <+57>: add rsp,0x8
0x0000000000401241 <+61>: ret
End of assembler dump

secret中将0x6030f0传给rdi,rdi为函数的第一个参数,可以查看下0x6030f0的内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
gdb-peda$ x/80gx 0x6030f0
0x6030f0 <n1>: 0x0000000000000024 0x0000000000603110
0x603100 <n1+16>: 0x0000000000603130 0x00000000 00000000
0x603110 <n21>: 0x0000000000000008 0x0000000000603190
0x603120 <n21+16>: 0x0000000000603150 0x0000000000000000
0x603130 <n22>: 0x0000000000000032 0x0000000000603170
0x603140 <n22+16>: 0x00000000006031b0 0x0000000000000000
0x603150 <n32>: 0x0000000000000016 0x0000000000603270
0x603160 <n32+16>: 0x0000000000603230 0x0000000000000000
0x603170 <n33>: 0x000000000000002d 0x00000000006031d0
0x603180 <n33+16>: 0x0000000000603290 0x0000000000000000
0x603190 <n31>: 0x0000000000000006 0x00000000006031f0
0x6031a0 <n31+16>: 0x0000000000603250 0x0000000000000000
0x6031b0 <n34>: 0x000000000000006b 0x0000000000603210
0x6031c0 <n34+16>: 0x00000000006032b0 0x0000000000000000
0x6031d0 <n45>: 0x0000000000000028 0x0000000000000000
0x6031e0 <n45+16>: 0x0000000000000000 0x0000000000000000
0x6031f0 <n41>: 0x0000000000000001 0x0000000000000000
0x603200 <n41+16>: 0x0000000000000000 0x0000000000000000
0x603210 <n47>: 0x0000000000000063 0x0000000000000000
0x603220 <n47+16>: 0x0000000000000000 0x0000000000000000
0x603230 <n44>: 0x0000000000000023 0x0000000000000000
0x603240 <n44+16>: 0x0000000000000000 0x0000000000000000
0x603250 <n42>: 0x0000000000000007 0x0000000000000000
0x603260 <n42+16>: 0x0000000000000000 0x0000000000000000
0x603270 <n43>: 0x0000000000000014 0x0000000000000000
0x603280 <n43+16>: 0x0000000000000000 0x0000000000000000
0x603290 <n46>: 0x000000000000002f 0x0000000000000000
0x6032a0 <n46+16>: 0x0000000000000000 0x0000000000000000
0x6032b0 <n48>: 0x00000000000003e9 0x0000000000000000
0x6032c0 <n48+16>: 0x0000000000000000 0x0000000000000000
0x6032d0 <node1>: 0x000000010000014c 0x00000000006032e0
0x6032e0 <node2>: 0x00000002000000a8 0x0000000000000000
0x6032f0 <node3>: 0x000000030000039c 0x0000000000603300
0x603300 <node4>: 0x00000004000002b3 0x0000000000603310
0x603310 <node5>: 0x00000005000001dd 0x0000000000603320
0x603320 <node6>: 0x00000006000001bb 0x0000000000

可以猜测这里存放着二叉树,结构可能为:

1
2
3
4
5
struct node{
long val;
struct node *l,*r;
}
// 每个node后补了8个字节,可能是为了内存对齐

可以根据这些n画出完整的二叉树,但人工画有点麻烦,不如先思考怎样得到所需的返回值2。

IDA反编译代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
__int64 __fastcall fun7(__int64 a1, __int64 a2)
{
__int64 result; // rax

if ( !a1 )
return 0xFFFFFFFFLL;
if ( *(_DWORD *)a1 > (int)a2 )
return 2 * (unsigned int)fun7(*(_QWORD *)(a1 + 8), a2);
result = 0LL;
if ( *(_DWORD *)a1 != (_DWORD)a2 )
result = 2 * (unsigned int)fun7(*(_QWORD *)(a1 + 16), a2) + 1;
return result;
}

a1(rdi)其实是二叉树的根,a1+8对应左子树,a1+16对应右子树。

若访问到空节点,则返回值会出现0xffffffff,所以我们只能在树上(软件原因树的最右分枝未补全)的节点选择。通过代码我们知道,若从节点x出发result初始值为0,若改节点为右儿子则a=2*a+1、为左儿子则a=a*2

由于2 = 2 * 1,所以需要的节点x满足,x是其父节点y的右儿子,y是根节点a1(rsi)的左儿子。

Untitled

所以输入20(或者0x603150的左儿子)就行。

笔记

  • 不小心按ctrl-z把进程suspend了,可以使用jobs查询当前工作,然后fg N (bash)fg %N(zsh) 将任务从后台转入前台

参考资料