多媒体指令(AVX加速数组求和)

#include <stdio.h>
#include <intrin.h>
#include <iostream>
#include <ctime> using namespace std;
void test1(double *a, double *b, double *re)
{
size_t t = clock();
for (int k = ; k < ; k++)
{
for (int i = ; i < ; i++)
{
re[i] = a[i] + b[i];
}
}
size_t en = clock();
cout << en - t << endl;
} void test2(double *a, double *b, double *re)
{
size_t t = clock();
__m256d m1, m2;
for (int k = ; k < ; k++)
{
for (int i = ; i < ; i += )
{
m1 = _mm256_set_pd(a[i], a[i + ], a[i + ], a[i + ]);
m2 = _mm256_set_pd(b[i], b[i + ], b[i + ], b[i + ]); __m256d l1 = _mm256_add_pd(m1, m2); re[i + ] = l1.m256d_f64[];
re[i + ] = l1.m256d_f64[];
re[i + ] = l1.m256d_f64[];
re[i] = l1.m256d_f64[];
}
}
size_t en = clock();
cout << en - t << endl;
} int main(int argc, char* argv[])
{
double *a = new double[];
double *b = new double[];
double *re = new double[];
for (int i = ; i < ; i++)
{
a[i] = i;
b[i] = i;
}
test1(a, b, re);
test2(a, b, re);
delete[] a;
delete[] b;
delete[] re;
system("pause");
return ;
}

大概能快个100毫秒左右。

上一篇:Day 2 @ RSA Conference Asia Pacific & Japan 2016


下一篇:delphi 小数点四舍五入问题