Lesson 13

Date: 4/19/2017
High Performance Computing (part I: OpenMP)
Linux System Administration


2D Matrix multiply example

Exercise
  • Two 2D matrices are multiplied. Only the outer loop iterations are parallelized in the code below. Threads share the outer loop iterations according to a predefined chunk size.
    C / C++ - critical Directive Example
    #include <omp.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    #define NRA 62                 /* number of rows in matrix A */
    #define NCA 15                 /* number of columns in matrix A */
    #define NCB 7                  /* number of columns in matrix B */
    
    int main (int argc, char *argv[]) 
    {
    int	tid, nthreads, i, j, k, chunk;
    double	a[NRA][NCA],           /* matrix A to be multiplied */
    	b[NCA][NCB],           /* matrix B to be multiplied */
    	c[NRA][NCB];           /* result matrix C */
    
    chunk = 10;                    /* set loop iteration chunk size */
    
    /*** Spawn a parallel region explicitly scoping all variables ***/ 
    #pragma omp parallel shared(a,b,c,nthreads,chunk) private(tid,i,j,k)
    
      {
      tid = omp_get_thread_num();
      if (tid == 0)
        {
        nthreads = omp_get_num_threads();
        printf("Starting matrix multiple example with %d threads\n",nthreads);
        printf("Initializing matrices...\n");
        }
      /*** Initialize matrices ***/
    
      #pragma omp for schedule (static, chunk) 
    
      for (i=0; i < NRA; i++)
        for (j=0; j < NCA; j++)
          a[i][j]= i+j;
    
      #pragma omp for schedule (static, chunk)
    
      for (i=0; i < NCA; i++)
        for (j=0; j < NCB; j++)
          b[i][j]= i*j;
    
      #pragma omp for schedule (static, chunk)
    
      for (i=0; i < NRA; i++)
        for (j=0; j < NCB; j++)
          c[i][j]= 0;
    
      /*** Do matrix multiply sharing iterations on outer loop ***/
      /*** Display who does which iterations for demonstration purposes ***/
      printf("Thread %d starting matrix multiply...\n",tid);
    
      #pragma omp for schedule (static, chunk)
    
      for (i=0; i < NRA; i++)    
        {
        printf("Thread=%d did row=%d\n",tid,i);
        for(j=0; j < NCB; j++)       
          for (k=0; k < NCA; k++)
            c[i][j] += a[i][k] * b[k][j];
        }
      }   /*** End of parallel region ***/ 
    
    /*** Print results ***/
    printf("******************************************************\n");
    printf("Result Matrix:\n");
    for (i=0; i < NRA; i++)
      {
      for (j=0; j < NCB; j++) 
        printf("%6.2f   ", c[i][j]);
      printf("\n"); 
      }
    printf("******************************************************\n");
    printf ("Done.\n");
    }
    
    
    
    
    Copy the content of the code above into file mm.c, then compile and run it as follows:
    gcc -fopenmp -o mm.x mm.c
    export OMP_NUM_THREADS=4
    ./mm.x
    




  • Take me to the Course Website