结构体
C语言中的结构体是一种复合数据结构,可以将不同数据类型的变量进行封装
一个顺序表结构体
#include <stdio.h>
#include <stdlib.h>
typedef struct Seqlist
{
int list[10];
int ptr;
}Seqlist;
int main(void)
{
Seqlist* p;
p = (Seqlist*)malloc(sizeof(Seqlist));
return 0;
}
反汇编如下
10: int main(void)
11: {
00E551D0 55 push ebp
00E551D1 8B EC mov ebp,esp
00E551D3 81 EC CC 00 00 00 sub esp,0CCh
00E551D9 53 push ebx
00E551DA 56 push esi
00E551DB 57 push edi
00E551DC 8D BD 34 FF FF FF lea edi,[ebp+FFFFFF34h]
00E551E2 B9 33 00 00 00 mov ecx,33h
00E551E7 B8 CC CC CC CC mov eax,0CCCCCCCCh
00E551EC F3 AB rep stos dword ptr es:[edi]
00E551EE B9 03 C0 E5 00 mov ecx,0E5C003h
00E551F3 E8 0F C1 FF FF call 00E51307
12: Seqlist* p;
13:
14: p = (Seqlist*)malloc(sizeof(Seqlist));
00E551F8 8B F4 mov esi,esp
00E551FA 6A 2C push 2Ch
00E551FC FF 15 7C B1 E5 00 call dword ptr ds:[00E5B17Ch]
00E55202 83 C4 04 add esp,4
00E55205 3B F4 cmp esi,esp
00E55207 E8 24 C0 FF FF call 00E51230
00E5520C 89 45 F8 mov dword ptr [ebp-8],eax
15:
16: return 0;
00E5520F 33 C0 xor eax,eax
我们可以看到在00A051D3,esp的地址偏移量增加了。如果main函数里只有return 0;的话地址偏移量为C0(十进制192),而这里为CC(十进制204)。偏移量增加不仅是因为为了给更多的变量分配栈空间,而且还为了栈对齐。
栈对齐是指让栈帧的地址向16字节对齐,目的是为了让计算机更好更快的去执行程序(计算机访问偶数地址比访问奇数地址速度要快)
可以试试将esp的地址偏移量与16相除,最后结果是能整除的。只要在函数中定义或声明变量都会使esp的地址偏移量增加
我们现在来看看malloc这段代码
malloc
14: p = (Seqlist*)malloc(sizeof(Seqlist));
00E551F8 8B F4 mov esi,esp
00E551FA 6A 2C push 2Ch
00E551FC FF 15 7C B1 E5 00 call dword ptr ds:[00E5B17Ch]
00E55202 83 C4 04 add esp,4
00E55205 3B F4 cmp esi,esp
00E55207 E8 24 C0 FF FF call 00E51230
00E5520C 89 45 F8 mov dword ptr [ebp-8],eax
15:
16: return 0;
00E5520F 33 C0 xor eax,eax
malloc函数的参数为变量的字节大小,很明显Seqlist结构体的大小为44个字节(10个int数组,和1个int指针),所以将44压栈。分配好空间后将空间地址给eax(具体操作在malloc函数中执行,即00E55207),eax将地址给指针p。
如果结构体中是不同数据类型的变量会怎么样?
不同数据类型的结构体
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
typedef struct Seqlist
{
char list[10];
int ptr;
_Bool empty;
}Seqlist;
int main(void)
{
Seqlist* p;
p = (Seqlist*)malloc(sizeof(Seqlist));
return 0;
}
反汇编如下
12: int main(void)
13: {
00B051D0 55 push ebp
00B051D1 8B EC mov ebp,esp
00B051D3 81 EC CC 00 00 00 sub esp,0CCh
00B051D9 53 push ebx
00B051DA 56 push esi
00B051DB 57 push edi
00B051DC 8D BD 34 FF FF FF lea edi,[ebp+FFFFFF34h]
00B051E2 B9 33 00 00 00 mov ecx,33h
00B051E7 B8 CC CC CC CC mov eax,0CCCCCCCCh
00B051EC F3 AB rep stos dword ptr es:[edi]
00B051EE B9 03 C0 B0 00 mov ecx,0B0C003h
00B051F3 E8 0F C1 FF FF call 00B01307
14: Seqlist* p;
15:
16: p = (Seqlist*)malloc(sizeof(Seqlist));
00B051F8 8B F4 mov esi,esp
00B051FA 6A 14 push 14h
00B051FC FF 15 7C B1 B0 00 call dword ptr ds:[00B0B17Ch]
00B05202 83 C4 04 add esp,4
00B05205 3B F4 cmp esi,esp
00B05207 E8 24 C0 FF FF call 00B01230
00B0520C 89 45 F8 mov dword ptr [ebp-8],eax
17:
18: return 0;
00B0520F 33 C0 xor eax,eax
乍一看结构体的字节大小应改为15,但是压入栈的字节数却是20。这是因为结构体要遵循字节对齐原则,在结构体中可以存放各种数据类型的变量,每个变量字节长度不统一,所以要将结构体中字节数较大的变量作为对齐标准,这样的话就会多出一些字节的空间。
int字节数最大,因此以它为标准,于是就有了 4 * 5 = 20 个字节。可以看到char和_Bool明显没有char数组的字节数大,那么就保留原有的字节数,后面多出来的字节就设为空。(char数组虽然看上去字节数大,但实际上就是10个变量排列在一起而已,本质上还是1)
结构体指针
链表结构体
#include <stdio.h>
#include <stdlib.h>
typedef struct LinkList {
int data;
struct LinkList* next;
}LinkList;
int main(void)
{
LinkList* L = NULL;
L = (LinkList*)malloc(sizeof(LinkList));
L->data = 1;
L->next = (LinkList*)malloc(sizeof(LinkList));
return 0;
}
反汇编如下
11: LinkList* L = NULL;
003B51F8 C7 45 F8 00 00 00 00 mov dword ptr [ebp-8],0
12:
13: L = (LinkList*)malloc(sizeof(LinkList));
003B51FF 8B F4 mov esi,esp
003B5201 6A 08 push 8
003B5203 FF 15 7C B1 3B 00 call dword ptr ds:[003BB17Ch]
003B5209 83 C4 04 add esp,4
003B520C 3B F4 cmp esi,esp
003B520E E8 1D C0 FF FF call 003B1230
003B5213 89 45 F8 mov dword ptr [ebp-8],eax ;结构体空间分配成功后将eax的值赋给L
14:
15: L->data = 1;
003B5216 8B 45 F8 mov eax,dword ptr [ebp-8]
003B5219 C7 00 01 00 00 00 mov dword ptr [eax],1
16: L->next = (LinkList*)malloc(sizeof(LinkList));
003B521F 8B F4 mov esi,esp
003B5221 6A 08 push 8
003B5223 FF 15 7C B1 3B 00 call dword ptr ds:[003BB17Ch]
003B5229 83 C4 04 add esp,4
003B522C 3B F4 cmp esi,esp
003B522E E8 FD BF FF FF call 003B1230
003B5233 8B 4D F8 mov ecx,dword ptr [ebp-8]
003B5236 89 41 04 mov dword ptr [ecx+4],eax ;下一块结构体空间分配成功后将eax赋给L->next
17:
18: return 0;
003B5239 33 C0 xor eax,eax
可以看到对结构体指针进行解引用就是结构体中的int变量,而将地址+4后就是下一块结构体空间的地址,这个地址偏移量跟结构体中的字节数有关
#include <stdio.h>
#include <stdlib.h>
typedef struct LinkList {
int data;
char ch;
short element;
struct LinkList* next;
}LinkList;
int main(void)
{
LinkList* L = NULL;
L = (LinkList*)malloc(sizeof(LinkList));
L->data = 1;
L->next = (LinkList*)malloc(sizeof(LinkList));
return 0;
}
反汇编如下
13: LinkList* L = NULL;
009B51F8 C7 45 F8 00 00 00 00 mov dword ptr [ebp-8],0
14:
15: L = (LinkList*)malloc(sizeof(LinkList));
009B51FF 8B F4 mov esi,esp
009B5201 6A 0C push 0Ch
009B5203 FF 15 7C B1 9B 00 call dword ptr ds:[009BB17Ch]
009B5209 83 C4 04 add esp,4
009B520C 3B F4 cmp esi,esp
009B520E E8 1D C0 FF FF call 009B1230
009B5213 89 45 F8 mov dword ptr [ebp-8],eax
16:
17: L->data = 1;
009B5216 8B 45 F8 mov eax,dword ptr [ebp-8]
009B5219 C7 00 01 00 00 00 mov dword ptr [eax],1
18: L->ch = 'A';
009B521F 8B 45 F8 mov eax,dword ptr [ebp-8]
009B5222 C6 40 04 41 mov byte ptr [eax+4],41h
19: L->element = 3;
009B5226 B8 03 00 00 00 mov eax,3
009B522B 8B 4D F8 mov ecx,dword ptr [ebp-8]
009B522E 66 89 41 06 mov word ptr [ecx+6],ax
20: L->next = (LinkList*)malloc(sizeof(LinkList));
009B5232 8B F4 mov esi,esp
009B5234 6A 0C push 0Ch
009B5236 FF 15 7C B1 9B 00 call dword ptr ds:[009BB17Ch]
009B523C 83 C4 04 add esp,4
009B523F 3B F4 cmp esi,esp
009B5241 E8 EA BF FF FF call 009B1230
009B5246 8B 4D F8 mov ecx,dword ptr [ebp-8]
009B5249 89 41 08 mov dword ptr [ecx+8],eax
21:
22: return 0;
009B524C 33 C0 xor eax,eax
这个算法运行时有点小问题,下面通过汇编来解决
链表
#include <stdio.h>
#include <stdlib.h>
typedef struct LinkList {
int data;
struct LinkList* next;
}LinkList;
LinkList* Init_LinkList()
{
LinkList* H = (LinkList*)malloc(sizeof(LinkList)); //头指针
if (!H)
return NULL;
else
{
H->next = NULL;
return H;
}
}
LinkList* Insert_LinkList(LinkList* H) //插入
{
int i;
int x;
if (!H)
return NULL;
LinkList* L = (LinkList*)malloc(sizeof(LinkList));
LinkList* R = H;
if (!L)
L = NULL;
else
{
for (i = 0; i < 10; i++)
{
scanf("%d", &x);
L->data = x;
L->next = R->next;
R->next = L;
R = L;
printf("%d\n", R->data);
LinkList* L = (LinkList*)malloc(sizeof(LinkList));
if (!L)
{
L = NULL;
return H;
}
}
}
return H;
}
int Find_LinkList(LinkList* H, int x) //查找
{
LinkList* R = H;
while (x != R->data)
{
R = R->next;
if (R == NULL)
break;
}
if (R == NULL)
return-1;
else
return R->data;
}
int main(void)
{
int x;
LinkList* H = Init_LinkList();
printf("请输入元素:(10次)\n");
H = Insert_LinkList(H);
putchar('\n');
printf("查找指定元素:\n");
scanf("%d", &x);
x = Find_LinkList(H, x);
if (x == -1)
printf("%d未被找到\n", x);
else
printf("%d被找到\n", x);
putchar('\n');
return 0;
}
Init_LinkList函数反汇编如下
进入Init_LinkList函数前
80: LinkList* H = Init_LinkList();
006052E8 E8 0E C2 FF FF call 006014FB
006052ED 89 45 EC mov dword ptr [ebp-14h],eax
Init_LinkList函数内
11: LinkList* H = (LinkList*)malloc(sizeof(LinkList)); //头指针
001C51F8 8B F4 mov esi,esp
001C51FA 6A 08 push 8
001C51FC FF 15 7C B1 1C 00 call dword ptr ds:[001CB17Ch]
001C5202 83 C4 04 add esp,4
001C5205 3B F4 cmp esi,esp
001C5207 E8 24 C0 FF FF call 001C1230
001C520C 89 45 F8 mov dword ptr [ebp-8],eax
12:
13: if (!H)
001C520F 83 7D F8 00 cmp dword ptr [ebp-8],0
001C5213 75 06 jne 001C521B
14: return NULL;
001C5215 33 C0 xor eax,eax
001C5217 EB 0F jmp 001C5228
001C5219 EB 0D jmp 001C5228
15: else
16: {
17: H->next = NULL;
001C521B 8B 45 F8 mov eax,dword ptr [ebp-8]
001C521E C7 40 04 00 00 00 00 mov dword ptr [eax+4],0
18: return H;
001C5225 8B 45 F8 mov eax,dword ptr [ebp-8]
这里的 if(!H) 中的 ! 运算,在汇编中是将变量与0比较,若等于0则执行return NULL;否则执行else。看来这个建立头指针的算法没有问题。下面来看看插入算法
进入Insert_LinkList函数前
83: H = Insert_LinkList(H);
009652FD 8B 45 EC mov eax,dword ptr [ebp-14h]
00965300 50 push eax
00965301 E8 F0 C1 FF FF call 009614F6
00965306 83 C4 04 add esp,4
00965309 89 45 EC mov dword ptr [ebp-14h],eax
这里的eax因为在执行printf函数时值被修改了,因此这里将H的地址赋给了eax。
Insert_LinkList函数内
24: int i;
25: int x;
26:
27: if (!H)
001C5098 83 7D 08 00 cmp dword ptr [ebp+8],0
001C509C 75 07 jne 001C50A5
28: return NULL;
001C509E 33 C0 xor eax,eax
001C50A0 E9 BF 00 00 00 jmp 001C5164
29:
30: LinkList* L = (LinkList*)malloc(sizeof(LinkList));
001C50A5 8B F4 mov esi,esp
001C50A7 6A 08 push 8
001C50A9 FF 15 7C B1 1C 00 call dword ptr ds:[001CB17Ch]
001C50AF 83 C4 04 add esp,4
001C50B2 3B F4 cmp esi,esp
001C50B4 E8 77 C1 FF FF call 001C1230
001C50B9 89 45 E0 mov dword ptr [ebp-20h],eax
31: LinkList* R = H;
001C50BC 8B 45 08 mov eax,dword ptr [ebp+8]
001C50BF 89 45 D4 mov dword ptr [ebp-2Ch],eax
32:
33: if (!L)
001C50C2 83 7D E0 00 cmp dword ptr [ebp-20h],0
001C50C6 75 0C jne 001C50D4
34: L = NULL;
001C50C8 C7 45 E0 00 00 00 00 mov dword ptr [ebp-20h],0
001C50CF E9 8D 00 00 00 jmp 001C5161
35:
36: else
37: {
38: for (i = 0; i < 10; i++)
001C50D4 C7 45 F8 00 00 00 00 mov dword ptr [ebp-8],0
001C50DB EB 09 jmp 001C50E6
001C50DD 8B 45 F8 mov eax,dword ptr [ebp-8]
001C50E0 83 C0 01 add eax,1
001C50E3 89 45 F8 mov dword ptr [ebp-8],eax
001C50E6 83 7D F8 0A cmp dword ptr [ebp-8],0Ah
001C50EA 7D 75 jge 001C5161
39: {
40: scanf("%d", &x);
001C50EC 8D 45 EC lea eax,[ebp-14h]
001C50EF 50 push eax
001C50F0 68 CC 7B 1C 00 push 1C7BCCh
001C50F5 E8 F7 C3 FF FF call 001C14F1
001C50FA 83 C4 08 add esp,8
41: L->data = x;
001C50FD 8B 45 E0 mov eax,dword ptr [ebp-20h]
001C5100 8B 4D EC mov ecx,dword ptr [ebp-14h]
001C5103 89 08 mov dword ptr [eax],ecx
42: L->next = R->next;
001C5105 8B 45 E0 mov eax,dword ptr [ebp-20h]
001C5108 8B 4D D4 mov ecx,dword ptr [ebp-2Ch]
001C510B 8B 51 04 mov edx,dword ptr [ecx+4]
001C510E 89 50 04 mov dword ptr [eax+4],edx
43: R->next = L;
001C5111 8B 45 D4 mov eax,dword ptr [ebp-2Ch]
001C5114 8B 4D E0 mov ecx,dword ptr [ebp-20h]
001C5117 89 48 04 mov dword ptr [eax+4],ecx
44: R = L;
001C511A 8B 45 E0 mov eax,dword ptr [ebp-20h]
001C511D 89 45 D4 mov dword ptr [ebp-2Ch],eax
45: printf("%d\n", R->data);
001C5120 8B 45 D4 mov eax,dword ptr [ebp-2Ch]
001C5123 8B 08 mov ecx,dword ptr [eax]
001C5125 51 push ecx
001C5126 68 D0 7B 1C 00 push 1C7BD0h
001C512B E8 B7 C3 FF FF call 001C14E7
001C5130 83 C4 08 add esp,8
46: LinkList* L = (LinkList*)malloc(sizeof(LinkList));
001C5133 8B F4 mov esi,esp
001C5135 6A 08 push 8
001C5137 FF 15 7C B1 1C 00 call dword ptr ds:[001CB17Ch]
001C513D 83 C4 04 add esp,4
001C5140 3B F4 cmp esi,esp
001C5142 E8 E9 C0 FF FF call 001C1230
001C5147 89 45 C8 mov dword ptr [ebp-38h],eax
47:
48: if (!L)
001C514A 83 7D C8 00 cmp dword ptr [ebp-38h],0
001C514E 75 0C jne 001C515C
49: {
50: L = NULL;
001C5150 C7 45 C8 00 00 00 00 mov dword ptr [ebp-38h],0
51: return H;
001C5157 8B 45 08 mov eax,dword ptr [ebp+8]
001C515A EB 08 jmp 001C5164
52: }
53: }
001C515C E9 7C FF FF FF jmp 001C50DD
54:
55: }
56: return H;
001C5161 8B 45 08 mov eax,dword ptr [ebp+8]
我们一步一步看,先从判断语句开始
27: if (!H)
001C5098 83 7D 08 00 cmp dword ptr [ebp+8],0
001C509C 75 07 jne 001C50A5
28: return NULL;
001C509E 33 C0 xor eax,eax
001C50A0 E9 BF 00 00 00 jmp 001C5164 jne 001C50A5
这里的dword ptr [ebp + 8]就是刚才压栈的eax,因为call指令在执行时会将call下面的指令的地址压栈,而又因为进入函数后ebp被压栈,所以是ebp + 8
30: LinkList* L = (LinkList*)malloc(sizeof(LinkList));
001C50A5 8B F4 mov esi,esp
001C50A7 6A 08 push 8
001C50A9 FF 15 7C B1 1C 00 call dword ptr ds:[001CB17Ch]
001C50AF 83 C4 04 add esp,4
001C50B2 3B F4 cmp esi,esp
001C50B4 E8 77 C1 FF FF call 001C1230
001C50B9 89 45 E0 mov dword ptr [ebp-20h],eax
31: LinkList* R = H;
001C50BC 8B 45 08 mov eax,dword ptr [ebp+8]
001C50BF 89 45 D4 mov dword ptr [ebp-2Ch],eax
这里没什么特别要说的,就是分配空间。下面这段很重要,由于这部分比较绕,我将符号名打开
36: else
37: {
38: for (i = 0; i < 10; i++)
001C50D4 C7 45 F8 00 00 00 00 mov dword ptr [ebp-8],0
001C50DB EB 09 jmp 001C50E6
001C50DD 8B 45 F8 mov eax,dword ptr [ebp-8]
001C50E0 83 C0 01 add eax,1
001C50E3 89 45 F8 mov dword ptr [ebp-8],eax
001C50E6 83 7D F8 0A cmp dword ptr [ebp-8],0Ah
001C50EA 7D 75 jge 001C5161
39: {
40: scanf("%d", &x);
001C50EC 8D 45 EC lea eax,[ebp-14h]
001C50EF 50 push eax
001C50F0 68 CC 7B 1C 00 push 1C7BCCh
001C50F5 E8 F7 C3 FF FF call 001C14F1
001C50FA 83 C4 08 add esp,8
41: L->data = x;
001C50FD 8B 45 E0 mov eax,dword ptr [ebp-20h]
001C5100 8B 4D EC mov ecx,dword ptr [ebp-14h]
001C5103 89 08 mov dword ptr [eax],ecx
42: L->next = R->next ;第一次循环时R仍然指向H,而此时H的next为NULL
001C5105 8B 45 E0 mov eax,dword ptr [ebp-20h]
001C5108 8B 4D D4 mov ecx,dword ptr [ebp-2Ch]
001C510B 8B 51 04 mov edx,dword ptr [ecx+4]
001C510E 89 50 04 mov dword ptr [eax+4],edx
43: R->next = L;
001C5111 8B 45 D4 mov eax,dword ptr [ebp-2Ch]
001C5114 8B 4D E0 mov ecx,dword ptr [ebp-20h]
001C5117 89 48 04 mov dword ptr [eax+4],ecx
44: R = L;
001C511A 8B 45 E0 mov eax,dword ptr [ebp-20h]
001C511D 89 45 D4 mov dword ptr [ebp-2Ch],eax
45: printf("%d\n", R->data);
001C5120 8B 45 D4 mov eax,dword ptr [ebp-2Ch]
001C5123 8B 08 mov ecx,dword ptr [eax]
001C5125 51 push ecx
001C5126 68 D0 7B 1C 00 push 1C7BD0h
001C512B E8 B7 C3 FF FF call 001C14E7
001C5130 83 C4 08 add esp,8
这段代码也没什么特别要说的,只是有点混乱而已,细心点就能看明白。
46: LinkList* L = (LinkList*)malloc(sizeof(LinkList));
001C5133 8B F4 mov esi,esp
001C5135 6A 08 push 8
001C5137 FF 15 7C B1 1C 00 call dword ptr ds:[001CB17Ch]
001C513D 83 C4 04 add esp,4
001C5140 3B F4 cmp esi,esp
001C5142 E8 E9 C0 FF FF call 001C1230
001C5147 89 45 C8 mov dword ptr [ebp-38h],eax
问题的根源找到了,这里的L已经变了,原来的L应该为dowrd ptr [ebp-20],而这里却变成了dword ptr [ebp-38]。这说明这两个不是一个变量,虽然变量名都叫L。如果是这样的话,那么来看看正确的代码是怎样的。
45: printf("%d\n", R->data);
00C35120 8B 45 D4 mov eax,dword ptr [ebp-2Ch]
00C35123 8B 08 mov ecx,dword ptr [eax]
00C35125 51 push ecx
00C35126 68 D0 7B C3 00 push 0C37BD0h
00C3512B E8 B7 C3 FF FF call 00C314E7
00C35130 83 C4 08 add esp,8
46: L = (LinkList*)malloc(sizeof(LinkList));
00C35133 8B F4 mov esi,esp
00C35135 6A 08 push 8
00C35137 FF 15 7C B1 C3 00 call dword ptr ds:[00C3B17Ch]
00C3513D 83 C4 04 add esp,4
00C35140 3B F4 cmp esi,esp
00C35142 E8 E9 C0 FF FF call 00C31230
00C35147 89 45 E0 mov dword ptr [ebp-20h],eax
结论正确
总结
- 在函数中每定义一个变量,esp的地址偏移量就会增加,增加的目的1是为了给更多的变量提供空间,2是为了栈对齐
- malloc函数在执行完毕后会将地址赋给eax然后返回
- 由于结构体中的变量可能会因为字节数而导致长度不一,因此结构体需要字节对齐,对齐标准以字节数最大的变量为主。汇编中如果需要结构体中的某个变量,则通过地址偏移的方式来获取,地址偏移量一般看变量的字节数