我正在尝试从 8 个进程收集 3D 矩阵块到根进程。每个进程都有一个大小为
7x7x7
的 3D 矩阵,但它只发送其中的一部分(3D 矩阵块)。大家可以看下图的说明:
例如,根进程
0
发送大小为3x3x3
的矩阵块。最后,根进程0
将所有3D矩阵块(图中左上角)获取为7x7x7
全局矩阵A
。
我将偏移量指定为 (1,1,0)
,它表示 3D 矩阵块在每个进程的整个 7x7x7
矩阵中的起始位置。
这是我的代码:
PROGRAM MATRIX_3D_GATHER
USE MPI
IMPLICIT NONE
integer :: rank, size, ierr
INTEGER, PARAMETER :: NUM_PROC = 8, DIM3 = 3, UNITCONST = 12
INTEGER, DIMENSION(NUM_PROC, DIM3) :: LIST_3D_SUBARRAY_SIZES
INTEGER, DIMENSION(NUM_PROC) :: DISPLACEMENTS
INTEGER, DIMENSION(DIM3) :: GLOBALSIZES, LOCALSIZES
INTEGER, DIMENSION(DIM3) :: SIZES, SUB_SIZES, STARTS, STARTS_LOCAL
INTEGER, ALLOCATABLE, DIMENSION(:,:,:) :: A, LOCALDATA
INTEGER, ALLOCATABLE, DIMENSION(:) :: SENDCOUNTS, RECVCOUNTS, SENDDISPLS, RECVDISPLS
INTEGER :: SENDTYPE, I, J, K, START_INDX_I, START_INDX_J, START_INDX_K, END_INDX_I, END_INDX_J, END_INDX_K
INTEGER, ALLOCATABLE, DIMENSION(:) :: SEND_TYPES, RECV_TYPES
INTEGER(KIND=MPI_ADDRESS_KIND) :: LB, EXTENT
INTEGER, ALLOCATABLE, DIMENSION(:) :: RECV_BLOCK_TYPES, RESIZED_RECV_BLOCK_TYPES
CHARACTER (LEN = 11) :: STRING
CHARACTER(LEN=20) :: FMT
INTEGER :: STAT
CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
! size must be equal to NUM_PROC = 8
IF (rank == 0) THEN
IF (size /= NUM_PROC) THEN
WRITE(*,*) "Program must run with ", NUM_PROC, " cores."
CALL MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
END IF
END IF
LIST_3D_SUBARRAY_SIZES = TRANSPOSE(RESHAPE((/ &
3, 3, 3, &
4, 3, 3, &
3, 4, 3, &
4, 4, 3, &
3, 3, 4, &
4, 3, 4, &
3, 4, 4, &
4, 4, 4 &
/), (/DIM3, NUM_PROC/)))
GLOBALSIZES(:) = 7
LOCALSIZES (:) = 7
STARTS_LOCAL = (/1, 1, 0/)
!displ. inside the root's final 3D matrix
DISPLACEMENTS(:) = (/0, 3, 21, 24, &
147, 150, 168, 171/)
!initialize data
IF ( rank == 0 ) THEN
ALLOCATE( A(GLOBALSIZES(1), GLOBALSIZES(2), GLOBALSIZES(3)) )
A(:,:,:) = -1
END IF
ALLOCATE( LOCALDATA (LOCALSIZES(1), LOCALSIZES(2), LOCALSIZES(3)) )
LOCALDATA(:,:,:) = -2
START_INDX_I = STARTS_LOCAL(1)+1
START_INDX_J = STARTS_LOCAL(2)+1
START_INDX_K = STARTS_LOCAL(3)+1
END_INDX_I = START_INDX_I + LIST_3D_SUBARRAY_SIZES(rank+1,1)
END_INDX_J = START_INDX_J + LIST_3D_SUBARRAY_SIZES(rank+1,2)
END_INDX_K = START_INDX_K + LIST_3D_SUBARRAY_SIZES(rank+1,3)
LOCALDATA(START_INDX_I:END_INDX_I, START_INDX_J:END_INDX_J, START_INDX_K:END_INDX_K) = rank
!allocate sendcounts, senddispls, sendtypes
ALLOCATE(SENDCOUNTS(size), SENDDISPLS(size), SEND_TYPES(size))
ALLOCATE(RECVCOUNTS(size), RECVDISPLS(size), RECV_TYPES(size))
SENDCOUNTS(:) = 0
SENDDISPLS(:) = 0
RECVCOUNTS(:) = 0
RECVDISPLS(:) = 0
RECV_TYPES(:) = MPI_INTEGER
SEND_TYPES(:) = MPI_INTEGER
!create type for sending to root
SIZES(:) = LOCALSIZES(:)
SUB_SIZES(:) = LIST_3D_SUBARRAY_SIZES(rank+1,:)
STARTS = STARTS_LOCAL
CALL MPI_TYPE_CREATE_SUBARRAY(DIM3, SIZES, SUB_SIZES, STARTS, &
MPI_ORDER_FORTRAN, MPI_INTEGER, SENDTYPE, ierr)
CALL MPI_TYPE_COMMIT(SENDTYPE, ierr)
!send to root
SENDCOUNTS(1) = 1
SEND_TYPES(1) = SENDTYPE
!create type for receiving from others
IF ( rank == 0 ) THEN
ALLOCATE(RECV_BLOCK_TYPES(size), RESIZED_RECV_BLOCK_TYPES(size))
SIZES(:) = GLOBALSIZES(:)
STARTS = (/0, 0, 0/)
! need to create size = NUM_PROC = 8 block types
DO I = 1, size
SUB_SIZES(:) = LIST_3D_SUBARRAY_SIZES(I,:)
CALL MPI_TYPE_CREATE_SUBARRAY(DIM3, SIZES, SUB_SIZES, STARTS, &
MPI_ORDER_FORTRAN, MPI_INTEGER, RECV_BLOCK_TYPES(I), ierr)
CALL MPI_TYPE_COMMIT(RECV_BLOCK_TYPES(I), ierr)
LB = 0
CALL MPI_TYPE_GET_EXTENT(MPI_INTEGER, LB, EXTENT, ierr)
CALL MPI_TYPE_CREATE_RESIZED(RECV_BLOCK_TYPES(I), LB, EXTENT, &
RESIZED_RECV_BLOCK_TYPES(I), ierr)
CALL MPI_TYPE_COMMIT(RESIZED_RECV_BLOCK_TYPES(I), ierr)
RECV_TYPES(I) = RESIZED_RECV_BLOCK_TYPES(I)
! what data root expects from others
RECVCOUNTS(I) = 1
RECVDISPLS(I) = DISPLACEMENTS(I)*EXTENT
END DO
END IF
CALL MPI_ALLTOALLW(LOCALDATA, SENDCOUNTS, SENDDISPLS, SEND_TYPES, &
A , RECVCOUNTS, RECVDISPLS, RECV_TYPES, &
MPI_COMM_WORLD, ierr)
! write 2D slices (rows x columns) of the final 3D matrix
IF ( rank == 0 ) THEN
WRITE ( STRING, '(I0)' ) GLOBALSIZES(2)
FMT = '('//TRIM(STRING)//'(1X,I4))'
DO K = 1, GLOBALSIZES(3)
WRITE ( STRING, '(I0)' ) K
OPEN( UNIT = UNITCONST + rank, FILE = 'MATRIX_2D_SLICE_'//TRIM(STRING)//'.DAT', STATUS = 'UNKNOWN', ACTION = 'WRITE', IOSTAT = STAT)
DO I = 1, GLOBALSIZES(1)
WRITE(UNITCONST + rank, FMT, IOSTAT = STAT) (A(I, J, K), J = 1, GLOBALSIZES(2))
END DO
CLOSE(UNITCONST + rank)
END DO
END IF
CALL MPI_FINALIZE(ierr)
END PROGRAM MATRIX_3D_GATHER
对我来说,代码看起来有效。人们可以看到最终 3D 矩阵的输出 2D 切片
A
。我根据 @Jonathan Dursi 的答案编写了代码
使用 MPI 分散不同大小的矩阵块
,
使用 MPI_Gatherv for Fortran
和
在 Fortran 中使用 MPI_Gather 发送 2D 矩阵的 2D 数组。
问题:我正在使用
MPI_Alltoallw
来收集块(例如,参见 使用 MPI 分散不同大小的矩阵块)。我想知道这是否可以通过MPI_Gatherv
实现?或者如果有人有更好的想法如何实现这个或改进代码。
非常感谢您花时间考虑这个问题。
此处不能使用
MPI_Gatherv
,因为每个部分都有不同的几何形状,并且需要自己的跨步 MPI 数据类型。 MPI_Gatherv
允许您使用不同大小的块,但所有块的数据类型相同。您可以使用它来收集和组装沿一个维度(所有碎片都相同)的不同尺寸的碎片,但前提是沿所有其他维度的尺寸都相同,本质上具有由不同数量的(超)板组成的碎片,这不是你的情况。