我首先创建一个文件如下:
struct event_data
{
uint32_t data_1;
uint32_t data_2;
uint32_t data_3;
uint32_t data_4;
uint32_t data_5;
uint32_t data_6;
uint32_t data_7;
uint32_t data_8;
};
FILE* fp = fopen("C:\\test.bin", "w+b");
for(int i=0; i<100000; i++)
{
event_data data;
fwrite(&data, sizeof(event_data), 1, fp);
}
fclose(fp);
之后我尝试使用boost的内存映射文件从文件中读取数据:
clock_t start = clock();
event_data stack_buffer;
event_data* heap_buffer = new event_data();
for(int j=0; j<10000; j++)
{
boost::iostreams::mapped_file mmap("C:\\test.bin", boost::iostreams::mapped_file::readonly);
const char* data = mmap.const_data();
for(int i=0; i<100000; i++)
{
const event_data* evt = reinterpret_cast<const event_data*>(data) + i;
// Use memcpy to copy data to buffer, either (1) or (2)
// memcpy(&stack_buffer, evt, sizeof(event_data)); <== (1)
// memcpy(heap_buffer, evt, sizeof(event_data)); <== (2)
}
}
clock_t end = clock();
printf("%f sec\n", (double) (end - start) / CLOCKS_PER_SEC);
如果我使用(1),它打印出~’0.56秒’.如果我使用(2),它打印出~’26.6秒’.为什么结果如此不同?
解决方法:
我写了两个函数:
extern int moo1 [100];
extern int moo2 [100];
void foo1 ()
{
memcpy(moo2, moo1, sizeof(moo1));
moo2[2] = moo2[77] + moo2[14];
}
void foo2 ()
{
int moo3[100];
memcpy(moo3, moo1, sizeof(moo1));
moo3[2] = moo3[77] + moo3[14];
}
这是gcc编译它们的内容:
foo1
_Z4foo1v:
.LFB5:
pushq %rdi
.seh_pushreg %rdi
pushq %rsi
.seh_pushreg %rsi
.seh_endprologue
movq .refptr.moo1(%rip), %rsi
movq .refptr.moo2(%rip), %rax
movq (%rsi), %rdx
leaq 8(%rax), %rdi
movq %rax, %rcx
andq $-8, %rdi
movq %rdx, (%rax)
movq 392(%rsi), %rdx
subq %rdi, %rcx
subq %rcx, %rsi
addl $400, %ecx
shrl $3, %ecx
movq %rdx, 392(%rax)
rep movsq
movl 56(%rax), %edx
addl 308(%rax), %edx
movl %edx, 8(%rax)
popq %rsi
popq %rdi
ret
foo2的
_Z4foo2v:
.LFB6:
.seh_endprologue
ret
得出自己的结论.