! Compares performance of matrix multiplication ! You can play by tweaking: ! (i) the order of TRIPLE DO index variables ! (ii) available compiler optimization options ! ! stali@purdue.edu program mm_test !use mkl95_blas implicit none real(8), allocatable :: mat_a(:,:), mat_b(:,:), mat_c(:,:) real(8) :: start, finish integer :: i, j, k, n print*,'Give N (between 500-2500)' read*,n allocate (mat_a(n,n),mat_b(n,n)) call random_number (mat_a) call random_number (mat_b) ! --- FORTRAN MATMUL INTRINSIC (most compilers should be able to auto-parallelize this) allocate (mat_c(n,n)) call cpu_time(start) mat_c=matmul(mat_a,mat_b) call cpu_time(finish) print*, 'MATMUL time',finish-start deallocate(mat_c) ! --- BLAS 3 DGEMM (parallelized (in MKL) if OMP_NUM_THREADS >= 2) allocate (mat_c(n,n)) call cpu_time(start) !call gemm (mat_a,mat_b,mat_c) ! simpler call instead of the one below; used with mkl95_blas module call dgemm ('N','N',n,n,n,1.0_8,mat_a,n,mat_b,n,0.0_8,mat_c,n) call cpu_time(finish) print*, 'DGEMM time',finish-start deallocate(mat_c) ! --- TRIPLE DO LOOP (most compilers should be able to auto-parallelize this) allocate (mat_c(n,n)) mat_c=0.0_8 call cpu_time(start) do i=1,n do j=1,n do k=1,n mat_c(i,j)=mat_c(i,j)+mat_a(i,k)*mat_b(k,j) end do end do end do call cpu_time(finish) print*, 'TRIPLE DO time',finish-start deallocate (mat_c) deallocate (mat_a,mat_b) end program mm_test