源文件
// mpi_AxB.c
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
const int rows = 10; //the rows of matrix
const int cols = 7; //the cols of matrix
int main(int argc, char* argv[]){
int i, j, k, myid, numprocs, anstag;
int A[rows][cols], B[cols], C[rows];
int buf[cols], ans,cnt;
double starttime,endtime;
double tmp,totaltime;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
if(numprocs < 2){
printf("Error:Too few processes!\n");
MPI_Abort(MPI_COMM_WORLD,99);
}
/* root processor*/
if(myid == 0){
/* initialize A and B */
starttime = MPI_Wtime();
printf("\nA = \n");
srand(getpid());
for (i = 0; i < rows; i++){
printf("\t");
for (j = 0; j < cols; j++)
{
A[i][j] = rand()%10;
printf("%d ",A[i][j]);
}
printf("\n");
}
printf("\nB = \n");
for (i = 0; i < cols; i++){
B[i] = rand()%10;
printf("\t%d \n",B[i]);
}
printf("\n");
// bcast the B vector to all slave processor
MPI_Bcast(B, cols, MPI_INT, 0, MPI_COMM_WORLD);
// partition the A matrix to all slave processor
for (i = 1; i < numprocs; i++) {
for (k = i - 1; k < rows; k += numprocs - 1) {
for (j = 0; j < cols; j++) {
buf[j] = A[k][j];
}
MPI_Send(buf, cols, MPI_INT, i, k, MPI_COMM_WORLD);
}
}
}
/* slave processor */
else{
MPI_Bcast(B, cols, MPI_INT, 0, MPI_COMM_WORLD);
/* every processor receive the part of A matrix,and make Mul operator with B vector */
for ( i = myid - 1; i < rows; i += numprocs - 1) {
MPI_Recv(buf, cols, MPI_INT, 0, i, MPI_COMM_WORLD, &status);
ans = 0;
for ( j = 0; j < cols; j++) {
ans += buf[j] * B[j];
}
//send back the result
MPI_Send(&ans, 1, MPI_INT, 0, i, MPI_COMM_WORLD);
}
}
/* root processor*/
if(myid == 0){
//receive the result from all slave processor
for ( i = 0; i < rows; i++){
MPI_Recv(&ans, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
//sender = status.MPI_SOURCE;
anstag = status.MPI_TAG;
C[anstag] = ans;
}
}
endtime = MPI_Wtime();
totaltime = endtime-starttime;
/* root processor print C and timer */
if(myid == 0){
printf("\nC = \n");
for (j = 0; j < rows; j++) {
printf("\t%d \n",C[j]);
}
printf("\n");
printf("total time:%f s.\n",totaltime);
}
MPI_Finalize();
return 0;
}
编译 链接 运行
用mpicc编译链接
/* 编译和链接一起 */
mpicc mpi_AxB.c -o mpi_AxB //直接生成可执行文件
/* 也可以 编译和链接分开 */
mpicc -c mpi_AxB.c //生成.o文件
mpicc -o mpi_AxB mpi_AxB.o //进一步生成可执行文件
/* 运行 */
mpirun ./mpi_AxB
也可以先用g++编译(编译时加上mpi的包含路径),再用mpicc链接。
MPI_INCLUDE = -I/work/soft/mpi/intelmpi/2017.4.239/intel64/include
g++ $(MPI_INCLUDE) -c mpi_AxB.c
mpicc -o mpi_AxB mpi_AxB.o
mpirun ./mpi_AxB
用g++链接会报错
MPI_INCLUDE = -I/work/soft/mpi/intelmpi/2017.4.239/intel64/include
MPILIB = -L/work/soft/mpi/intelmpi/2017.4.239/intel64/bin/lib -lstdc++
g++ $(MPI_INCLUDE) -c mpi_AxB.c
g++ $(MPILIB) -o mpi_AxB mpi_AxB.o
mpirun ./mpi_AxB
运行结果:
A =
6 9 9 3 9 7 3
6 9 3 1 9 1 3
0 0 7 0 1 7 6
4 0 4 5 0 2 6
5 0 9 3 1 0 6
0 7 9 6 8 5 0
7 6 5 8 8 3 8
9 0 4 3 0 0 1
2 2 7 9 4 8 3
8 8 9 8 7 1 7
B =
5
8
7
3
4
2
3
C =
233
173
85
85
119
179
204
85
143
242
Makefile文件:
MPI_INCLUDE = /work/soft/mpi/intelmpi/2017.4.239/intel64/include
MPILIB = -L/work/soft/mpi/intelmpi/2017.4.239/intel64/bin/lib -lstdc++
MPIFLAGS = -I$(MPI_INCLUDE)
有时mpirun需要加一些参数:
ibv_exp_query_device: invalid comp_mask !!! (comp_mask = 0x7f81fdd30040 valid_mask = 0x1)
[r4-node9][[44333,1],23][btl_openib_component.c:1670:init_one_device] error obtaining device attributes for mlx5_0 errno says Invalid argument
--------------------------------------------------------------------------
WARNING: There was an error initializing an OpenFabrics device.
Local host: r4-node9
Local device: mlx5_0
--------------------------------------------------------------------------
报上面这个错,mpirun加参数 -mca pml ucx