MPI/C – Matrix multiplication [order N]
November 30th, 2010 @ 04:11:19
MPI
For my understanding of what MPI is &/or does, please refer to this post.
Program Listing
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | /* matrix_multiply.c * PARALLEL [MPI] C PROGRAM TO PERFORM MATRIX MULTIPLICATION. * DISPLAY OF MATRIX ELEMENTS IS SET FOR 'INTEGER-LIKE'. IF [SOME/ALL OF] * THE MATRIX ELEMENTS ARE NON-INTEGERS, MODIFY THE DISPLAY IN PRINTF * STATEMENTS APPROPRIATELY. * * TESTED SUCCESSFULLY WITH MPICH2 (1.3.1) COMPILED AGAINST GCC (4.1.2) * IN A LINUX BOX WITH QUAD CORE INTEL XEON PROCESSOR (1.86 GHz) & 4GB OF RAM. * * STAGE #01: * SQUARE MATRICES ONLY * MATRIX ELEMENTS GENERATED WITHIN THE PROGRAM * * STAGE #02: (PROPOSED) * SQUARE MATRICES ONLY * MATRIX ELEMENTS READ FROM EXTERNAL DATA FILES - USEFUL * WHEN A THIRD PARTY PROGRAM GENERATES DATA. ALSO REMOVES * THE NEED FOR RECOMPILING THE PROGRAM EVERY TIME ORDER OF * MATRICES CHANGE. WRITE THE PRODUCT MATRIX TO A FILE. * * STAGE #03: (PROPOSED) * NOT NECESSARILY SQUARE MATRICES, i.e. * A(I x J) * B (J x K) = C(I x K) * READ MATRICES FROM EXTERNAL DATA FILES * CHECK TO MAKE SURE 'COLS(A) = ROWS(B)' BEFORE * ATTEMPTING TO PERFORM THE ACTUAL MULTIPLICATION. * WRITE THE PRODUCT MATRIX TO A FILE. * * FIRST WRITTEN: GOWTHAM; Fri, 10 Sep 2010 23:33:28 -0400 * LAST MODIFIED: GOWTHAM; Sat, 20 Nov 2010 02:19:38 -0500 * * URL: * http://sgowtham.net/blog/2010/11/30/mpi-c-matrix-multiplication-order-n/ * * COMPILATION: * mpicc -g -Wall -lm matrix_multiply.c -o matrix_multiply.x * * EXECUTION: * mpirun -machinefile MACHINEFILE -np NPROC ./matrix_multiply.x * * NPROC : NUMBER OF PROCESSORS ALLOCATED TO RUNNING THIS PROGRAM; * MUST BE LESS THAN OR EQUAL TO THE ORDER OF THE * MATRICES INVOLVED. * MACHINEFILE : FILE LISTING THE HOSTNAMES OF PROCESSORS ALLOCATED TO * RUNNING THIS PROGRAM * */ /* STANDARD HEADERS AND DEFINITIONS * REFERENCE: http://en.wikipedia.org/wiki/C_standard_library */ #include <stdio.h> /* Core input/output operations */ #include <stdlib.h> /* Conversions, random numbers, memory allocation, etc. */ #include <math.h> /* Common mathematical functions */ #include <time.h> /* Converting between various date/time formats */ #include <mpi.h> /* MPI functionality */ #define MASTER 0 /* Process ID for MASTER */ #define N 128 /* Order of the matrices */ /* FUNCTION TO MAP THE PROCESSOR ID * 'dest' OPTION IN MPI_Send MUST BE AN INTEGER BETWEEN 0 AND 'n_proc - 1' */ int map_processors(int i, int n_procs) { n_procs = n_procs - 1; int r = (int) ceil( (double) N / (double) n_procs); int processor = i / r; /* RETURN THE VALUE 'processor + 1' */ return processor + 1; } /* MAIN PROGRAM BEGINS */ int main(int argc, char **argv) { /* VARIABLE DECLARATION */ int n_procs, /* Number of processors */ proc_id, /* Process identifier */ proc_source, /* Mapped processor ID of the source */ proc_dest, /* Mapped processor ID of the destination */ proc_current, /* Mapped processor ID */ i, j, k; /* Dummy / running indices */ double A[N][N], /* Matrix A - N x N */ B[N][N], /* Matrix B - N x N */ C[N][N], /* Matrix C - N x N */ sum, /* Cumulated sum of matrix product */ Abuf[N], /* 1D array to hold the value of relevant portion of A received from MASTER */ Cbuf[N]; /* 1D array to hold the value of 'sum' so that it can be sent to MASTER */ MPI_Status status; /* MPI structure containing return codes for message passing operations */ /* INITIALIZE MPI */ MPI_Init(&argc, &argv); /* GET THE PROCESSOR ID AND NUMBER OF PROCESSORS */ MPI_Comm_rank(MPI_COMM_WORLD, &proc_id); MPI_Comm_size(MPI_COMM_WORLD, &n_procs); /* IF MASTER, THEN DO THE FOLLOWING: * POPULATE THE MATRICES A & B * SEND FULL MATRIX B TO ALL WORKERS * SEND RELEVANT PORTIONS OF MATRIX A TO ALL WORKERS * RECEIVE MATRIX C FROM WORKERS * DISPLAY MATRIX C */ if (proc_id == MASTER) { /* POPULATE MATRIX A */ printf("\n Matrix A:\n\n"); for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { /* EACH MATRIX ELEMENT IS THE SUM OF INDICES */ A[i][j] = i + j; printf(" %4.0lf", A[i][j]); } printf("\n"); } /* POPULATE MATRIX B */ printf("\n Matrix B:\n\n"); for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { /* EACH MATRIX ELEMENT IS THE PRODUCT OF INDICES */ B[i][j] = i * j; printf(" %4.0lf", B[i][j]); } printf("\n"); } /* CONVERT MATRIX B INTO 1D BUFFERS; * SEND ALL THOSE 1D BUFFERS TO ALL WORKERS */ for (i = 1; i < n_procs; i++) { for (j = 0; j < N; j++) { /* MPI_Send(buf, count, datatype, dest, tag, comm) */ MPI_Send(&B[j], N, MPI_DOUBLE, i, j + 1000, MPI_COMM_WORLD); } } /* SEND RELEVANT PORTIONS OF A [SPLIT ROW-WISE] TO ALL WORKERS */ for (i = 0; i < N; i++) { /* ID OF THE PROCESSOR THAT'S RECEIVING THE INFORMATION (dest) */ proc_dest = map_processors(i, n_procs); /* MPI_Send(buf, count, datatype, dest, tag, comm) */ MPI_Send(&A[i], N, MPI_DOUBLE, proc_dest, i + 10000, MPI_COMM_WORLD); } /* RECEIVE RESULT FROM WORKERS */ for (i = 0; i < N; i++) { /* ID OF THE PROCESSOR THAT'S SENDING THE INFORMATION (source) */ proc_source = map_processors(i, n_procs); /* MPI_Recv(buf, count, datatype, source, tag, comm, status) */ MPI_Recv(&C[i], N, MPI_DOUBLE, proc_source, i, MPI_COMM_WORLD, &status); } /* PRINT MATRIX C */ printf("\n Matrix C:\n\n"); for (i = 0; i < N; i++) { for (j = 0; j < N; j++) { printf(" %4.0lf", C[i][j]); } printf("\n"); } printf("\n"); } /* MASTER LOOP ENDS */ /* IF WORKER, THEN DO THE FOLLOWING: * RECEIVE ENTIRE MATRIX B FROM MASTER * RECEIVE RELEVANT PORTION OF MATRIX A FROM MASTER * PERFORM RELEVANT PORTION OF A * B CALCULATION * SEND RELEVANT PORTION OF MATRIX C TO MASTER */ if (proc_id > MASTER) { /* RECEIVE THE ENTIRE MATRIX B FROM MASTER AS A SET OF 1D BUFFERS */ /* MPI_Recv(buf, count, datatype, source, tag, comm, status) */ for (i = 0; i < N; i++) { MPI_Recv(&B[i], N, MPI_DOUBLE, 0, i + 1000, MPI_COMM_WORLD, &status); } /* FOR LOOP - MATRIX MULTIPLICATION - BEGINS */ for (i = 0; i < N; i++) { /* ID OF THE PROCESSOR THAT'S SENDING THE INFORMATION (source) */ proc_current = map_processors(i, n_procs); /* IF LOOP - CHECK CURRENT PROCESSOR ID - BEGINS */ if (proc_current == proc_id) { /* RECEIVE RELEVANT PORTIONS OF A [SPLIT ROW-WISE] FROM MASTER */ MPI_Recv(&Abuf, N, MPI_DOUBLE, 0, i + 10000, MPI_COMM_WORLD, &status); for (j = 0; j < N; j++) { /* sum NEEDS TO BE SET TO ZERO FOR EVERY VALUE OF j */ sum = 0; /* CALCULATE THE sum, CUMULATIVE SUM OF PRODUCT OF * Abuf[i] & B[j][k] */ for (k = 0; k < N; k++) { sum = sum + (Abuf[k] * B[k][j]); } /* ASSIGN THE VALUE OF 'sum' to Cbuf[j] SO THAT MPI_Send CAN * SEND IT TO MASTER */ Cbuf[j] = sum; } /* SEND Cbuf[j] TO MASTER */ MPI_Send(&Cbuf, N, MPI_DOUBLE, 0, i, MPI_COMM_WORLD); } /* IF LOOP - CHECK CURRENT PROCESSOR ID - ENDS */ } /* FOR LOOP - MATRIX MULTIPLICATION - ENDS */ } /* WORKER LOOP ENDS */ /* FINALIZE MPI */ MPI_Finalize(); /* INDICATE THE TERMINATION OF THE PROGRAM */ return 0; } /* MAIN PROGRAM ENDS */ |
Program Compilation & Execution
The machine where I am running this calculation, dirac, has 4 processors and has MPICH2 v1.3.1 compiled against GCC v4.1.2 compilers.
[guest@dirac mpi_samples]$ which mpicc alias mpicc='mpicc -g -Wall -lm' ~/mpich2/1.3.1/gcc/4.1.2/bin/mpicc [guest@dirac mpi_samples]$ which mpirun alias mpirun='mpirun -machinefile $HOME/machinefile' ~/mpich2/1.3.1/gcc/4.1.2/bin/mpirun [guest@dirac mpi_samples]$ mpicc matrix_multiply.c -o matrix_multiply.x [guest@dirac mpi_samples]$ mpirun -np 2 ./matrix_multiply.x Matrix A: 0 1 1 2 Matrix B: 0 0 0 1 Matrix C: 0 1 0 2 [guest@dirac mpi_samples]$ mpirun -np 2 ./matrix_multiply.x Matrix A: 0 1 2 3 1 2 3 4 2 3 4 5 3 4 5 6 Matrix B: 0 0 0 0 0 1 2 3 0 2 4 6 0 3 6 9 Matrix C: 0 14 28 42 0 20 40 60 0 26 52 78 0 32 64 96 [guest@dirac mpi_samples]$ mpirun -np 2 ./matrix_multiply.x Matrix A: 0 1 2 3 4 5 6 7 1 2 3 4 5 6 7 8 2 3 4 5 6 7 8 9 3 4 5 6 7 8 9 10 4 5 6 7 8 9 10 11 5 6 7 8 9 10 11 12 6 7 8 9 10 11 12 13 7 8 9 10 11 12 13 14 Matrix B: 0 0 0 0 0 0 0 0 0 1 2 3 4 5 6 7 0 2 4 6 8 10 12 14 0 3 6 9 12 15 18 21 0 4 8 12 16 20 24 28 0 5 10 15 20 25 30 35 0 6 12 18 24 30 36 42 0 7 14 21 28 35 42 49 Matrix C: 0 140 280 420 560 700 840 980 0 168 336 504 672 840 1008 1176 0 196 392 588 784 980 1176 1372 0 224 448 672 896 1120 1344 1568 0 252 504 756 1008 1260 1512 1764 0 280 560 840 1120 1400 1680 1960 0 308 616 924 1232 1540 1848 2156 0 336 672 1008 1344 1680 2016 2352 [guest@dirac mpi_samples]$ |