0%

MPI学习笔记

利用MPI可以加速排序算法。
调用c++标准库的sort对1e7的数据进行排序,大约需要2.2秒的时间。

15826979841.jpg

使用MPI将程序并行化,可以大大加快速度。

方法一

将主进程待排序的数组分为两部分,送到两个子进程排序,排完之后再送到主进程,将它们合并起来。

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include<iostream>
#include<mpi.h>
#include<algorithm>
using namespace std;
const int MAX_size=1e7;
int main(int argc,char** argv){
int numprocs, myid, source;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int siz=MAX_size/2;
if(myid==0){
int *nums=new int[MAX_size+3];
for(int i=0;i<MAX_size;i++){
nums[i]=rand();
}
int t1=clock();
MPI_Send(nums,siz,MPI_INT,1,1,MPI_COMM_WORLD);
MPI_Send(nums+siz,siz,MPI_INT,2,2,MPI_COMM_WORLD);
int* rec1=new int[siz+3],*rec2=new int[siz+3];
MPI_Recv(rec1,siz,MPI_INT,1,1,MPI_COMM_WORLD,&status);
MPI_Recv(rec2,siz,MPI_INT,2,2,MPI_COMM_WORLD,&status);
int i=0,j=0,loc=0;
while(i<siz&&j<siz){
if(rec1[i]<rec2[j])nums[loc++]=rec1[i++];
else nums[loc++]=rec2[j++];
}
while(i<siz)nums[loc++]=rec1[i++];
while(j<siz)nums[loc++]=rec2[j++];
int t2=clock();
cout<<t2-t1<<endl;
}
else{
int* rec=new int[siz+3];
MPI_Recv(rec,siz,MPI_INT,0,myid,MPI_COMM_WORLD,&status);
sort(rec,rec+siz);
MPI_Send(rec,siz,MPI_INT,0,myid,MPI_COMM_WORLD);
}
MPI_Finalize();
}

大概需要1.2秒的时间:
1582699014283.png

方法二

将数组分为4部分,在4个子进程里排序,再两个两个合并起来,最后再送到主进程里合并。总共有7个进程。
代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include<iostream>
#include <mpi.h>
#include<algorithm>
using namespace std;
const int MAX_size=1e7;
int get_state(int myid){
if(myid==0)return 1;
else if(myid<3)return 0;
else return -1;
}
int main(int argc, char* argv[])
{
int numprocs, myid, source;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int state=get_state(myid);
if(state==1){
int* nums=new int[MAX_size+3];
int son1=(myid<<1)+1,son2=(myid<<1)+2;
for(int i=0;i<MAX_size;i++)nums[i]=rand();
int t1=clock();
MPI_Send(nums,MAX_size/2,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Send(nums+MAX_size/2,MAX_size/2,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
int* rec1=new int[MAX_size/2+3],*rec2=new int[MAX_size/2+3];
MPI_Recv(rec1,MAX_size/2,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Recv(rec2,MAX_size/2,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
int i=0,j=0,size=MAX_size/2,loc=0;
while(i<size&&j<size){
if(rec1[i]<rec2[j]){
nums[loc++]=rec1[i++];
}
else{
nums[loc++]=rec2[j++];
}
}
while(i<size){nums[loc++]=rec1[i++];}
while(j<size){nums[loc++]=rec2[j++];}
// for(int i=0;i<MAX_size;i++)cout<<nums[i]<<endl;
int t2=clock();
cout<<t2-t1<<endl;
}

else if(state==0){
int* nums=new int[MAX_size/2+3];
int* rec1=new int[MAX_size/4+3],*rec2=new int[MAX_size/4+3];
int son1=(myid<<1)+1,son2=(myid<<1)+2,fa=(myid-1)>>1;
MPI_Recv(nums,MAX_size/2,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Send(nums,MAX_size/4,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Send(nums+MAX_size/4,MAX_size/4,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Recv(rec1,MAX_size/4,MPI_INT,son1,myid*numprocs+son1,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
MPI_Recv(rec2,MAX_size/4,MPI_INT,son2,myid*numprocs+son2,MPI_COMM_WORLD,&status);
//cout<<myid<<" "<<son1<<" "<<son2<<endl;
int i=0,j=0,size=MAX_size/4,loc=0;
while(i<size&&j<size){
if(rec1[i]<rec2[j]){
nums[loc++]=rec1[i++];
}
else{
nums[loc++]=rec2[j++];
}
}
while(i<size){nums[loc++]=rec1[i++];}
while(j<size){nums[loc++]=rec2[j++];}
MPI_Send(nums,MAX_size/2,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD);
}

else{
int* nums=new int[MAX_size/4+3];
int fa=(myid-1)>>1;
MPI_Recv(nums,MAX_size/4,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD,&status);
// cout<<myid<<endl;
sort(nums,nums+MAX_size/4);
MPI_Send(nums,MAX_size/4,MPI_INT,fa,fa*numprocs+myid,MPI_COMM_WORLD);
}
// cout<<"??\n";
MPI_Finalize();
// cout<<myid<<"end\n";
} /* end main */

这种方法大约需要0.87秒:
1582699491051.png

方法三

将数组分到多个进程里排序,再送回主进程,直接进行排序:
代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include<iostream>
#include<algorithm>
#include<mpi.h>
const int MAX_size=1e7;
using namespace std;
int main(int argc,char** argv){
int numprocs, myid, source;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
int sub_size=MAX_size/(numprocs-1);
int siz[104]={0};
for(int i=1;i<numprocs;i++)siz[i]=MAX_size/(numprocs-1);
for(int i=1;i<=MAX_size%(numprocs-1);i++)siz[i]++;
if(!myid){
int* num=new int[MAX_size];
for(int i=0;i<MAX_size;i++)num[i]=rand();
int t1=clock();
int loc[numprocs]={0};
for(int i=1,*tem=num;i<numprocs;i++,tem+=siz[i]){
// cout<<i<<endl;
MPI_Send(tem,siz[i],MPI_INT,i,i,MPI_COMM_WORLD);
}
int** ans=new int*[numprocs];
// cout<<"??\n";
for(int i=1;i<numprocs;i++){
ans[i]=new int[siz[i]+3];
MPI_Recv(ans[i],siz[i],MPI_INT,i,i,MPI_COMM_WORLD,&status);
}
for(int i=0;i<MAX_size;i++){
int minval=(1ll<<31)-1,locc=0;
for(int i=1;i<numprocs;i++){
if(loc[i]<siz[i]&&ans[i][loc[i]]<minval)minval=ans[i][loc[i]],locc=i;
}
num[i]=minval,loc[locc]++;
}
int t2=clock();
cout<<t2-t1<<endl;
}
else{
int* num=new int[siz[myid]+3];
MPI_Recv(num,siz[myid],MPI_INT,0,myid,MPI_COMM_WORLD,&status);
sort(num,num+siz[myid]);
MPI_Send(num,siz[myid],MPI_INT,0,myid,MPI_COMM_WORLD);
}
MPI_Finalize();

}

对进程数量分别为4,5,6,7,8的情况做了5次实验,结果如下(单位为μs):

进程数 平均用时 最短用时 最长用时
4 996875 984375 1015625
5 868750 843750 890625
6 865625 843750 890625
7 890625 875000 906250
8 906250 859375 953125

1582702015001.png