OpenMP
Um método de paralelização para as linguagens
Fortran, C, C++ para shared memory systems.
✓
Directivas para o compilador
✓
Threads
✓
Biblioteca de rotinas
✓
Variáveis de ambiente
entidade que pode executar de
f o r m a i n d e p e n d e n t e u m
conjunto de instruções
B. Chapman, G. Jost, R. van der Pas
Using OpenMP, MIT Press 2008
Multiplicação Matriz-Vector
M
ik
V
k
= A
i
i
k
Paralelização segundo i
PROGRAM matriz_vector IMPLICIT NONE
INTEGER n, m, memstat REAL start, finish
DOUBLE PRECISION, DIMENSION (:), ALLOCATABLE :: vec, a DOUBLE PRECISION, DIMENSION (:,:), ALLOCATABLE :: mat PRINT *, " M(n,m) V(m) = A(m) "
PRINT *, " Valores para m, n >>>" READ(*,*) n, m
ALLOCATE( mat(n,m), STAT=memstat )
IF ( memstat /= 0) STOP " Erro na alocação de M " ALLOCATE( vec(m), STAT=memstat )
IF ( memstat /= 0) STOP " Erro na alocação de V " ALLOCATE( a(m), STAT=memstat )
IF ( memstat /= 0) STOP " Erro na alocação de A " PRINT *, " Inicialização da matriz M e vector V" mat = 1
vec = 1
PRINT *, " M * V" CALL CPU_TIME(start)
CALL mat_vect( n, m, mat, vec, a ) CALL CPU_TIME(finish)
PRINT *, " Tempo Mat*Vec = ", finish-start, “ seg” PRINT *, " Finaliza"
IF ( ALLOCATED(mat) ) DEALLOCATE( mat, STAT=memstat ) IF ( ALLOCATED(vec) ) DEALLOCATE( vec, STAT=memstat ) IF ( ALLOCATED(a) ) DEALLOCATE( a, STAT=memstat ) END PROGRAM matriz_vector
SUBROUTINE mat_vect( n, m, mat, vec, x )
IMPLICIT NONE
INTEGER, INTENT(IN) :: n, m
DOUBLE PRECISION, DIMENSION (n,m), INTENT(IN) :: mat
DOUBLE PRECISION, DIMENSION (m), INTENT(IN) :: vec
DOUBLE PRECISION, DIMENSION (m), INTENT(INOUT) :: x
! ... Variáveis Locais
INTEGER i, j
DO i = 1, n
x(i) = 0
DO j = 1, m
x(i) = x(i) + mat(i,j)*vec(j)
END DO
END DO
SUBROUTINE mat_vect( n, m, mat, vec, x )
IMPLICIT NONE
INTEGER, INTENT(IN) :: n, m
DOUBLE PRECISION, DIMENSION (n,m), INTENT(IN) :: mat
DOUBLE PRECISION, DIMENSION (m), INTENT(IN) :: vec
DOUBLE PRECISION, DIMENSION (m), INTENT(INOUT) :: x
! ... Variáveis Locais
INTEGER i, j
!$OMP PARALLEL DO
DEFAULT(none)
&
!$OMP SHARED(n,m,mat,vec,x) PRIVATE(i,j)
DO i = 1, n
x(i) = 0
DO j = 1, m
x(i) = x(i) + mat(i,j)*vec(j)
END DO
END DO
!$OMP END PARALLEL DO
END SUBROUTINE mat_vect
#pragma omp parallel for default(none) \
shared(n,m,mat,vec,x) private(i,j)
for( i = 1; i <= n; i++)
{ x(i) = 0;
for( j = 1; j <= m; j++)
x(i) = x(i) + mat(i,j)*vec(j)
}
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP SHARED(n,m,mat,vec,x) PRIVATE(i,j)
DO i = 1, n
x(i) = 0
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP SHARED(x,mat,vec,i) PRIVATE(j)
DO j = 1, m
x(i) = x(i) + mat(i,j)*vec(j)
END DO
!$OMP END PARALLEL DO
END DO
!$OMP END PARALLEL DO
!$OMP PARALLEL DO DEFAULT(none) &
!$OMP SHARED(n,m,mat,vec,x) PRIVATE(i)
DO i = 1, n
x(i) = SUM(mat(i,:)*vec(:))
END DO
Tempos Execução (3000 x 3000):
Série: (somátorios) 0.175
(SUM) 0.126
Paralelo (2 Threads): (somatórios) 0.196
(2 ciclos) 0.219
(SUM) 0.136
PROGRAM pi_s USE omp_lib IMPLICIT NONE
INTEGER n, i, n_threads REAL start, finish
DOUBLE PRECISION w, s, x, pi
DOUBLE PRECISION, PARAMETER :: ZERO = 0, ONE = 1, HALF = 0.5 DOUBLE PRECISION, EXTERNAL :: f
READ(*,*) n n_threads = 1 pi = ZERO
w = ONE/DBLE(n) CALL CPU_TIME(start)
!$OMP PARALLEL PRIVATE(x,s)
!$ n_threads = omp_get_num_threads() s = ZERO !$OMP DO DO i = 1, n x = w*( DBLE(i) - HALF) s = s + f(x) END DO !$OMP END DO !$OMP CRITICAL pi = pi + w*s !$OMP END CRITICAL
!$OMP END PARALLEL
CALL CPU_TIME(finish)
PRINT *, n, pi
PRINT *, " T Exec. = ", finish-start PRINT *, " # Threads = ", n_threads END PROGRAM pi_s
FUNCTION f(x) IMPLICIT NONE
DOUBLE PRECISION, INTENT(IN) :: x DOUBLE PRECISION f
!...
DOUBLE PRECISION, PARAMETER :: FOUR = 4, ONE = 1 f = FOUR / (ONE + x*x)
END FUNCTION f