使用 MPI 收集不同大小的 3D 矩阵块

问题描述 投票:0回答:1

我正在尝试从 8 个进程收集 3D 矩阵块到根进程。每个进程都有一个大小为

7x7x7
的 3D 矩阵,但它只发送其中的一部分(3D 矩阵块)。大家可以看下图的说明:enter image description here

例如,根进程

0
发送大小为
3x3x3
的矩阵块。最后,根进程
0
将所有3D矩阵块(图中左上角)获取为
7x7x7
全局矩阵
A
。 我将偏移量指定为
(1,1,0)
,它表示 3D 矩阵块在每个进程的整个
7x7x7
矩阵中的起始位置。

这是我的代码:

PROGRAM MATRIX_3D_GATHER

USE MPI

IMPLICIT NONE

integer :: rank, size, ierr
INTEGER, PARAMETER :: NUM_PROC = 8, DIM3 = 3, UNITCONST = 12
INTEGER, DIMENSION(NUM_PROC, DIM3) :: LIST_3D_SUBARRAY_SIZES
INTEGER, DIMENSION(NUM_PROC) :: DISPLACEMENTS
INTEGER, DIMENSION(DIM3) :: GLOBALSIZES, LOCALSIZES
INTEGER, DIMENSION(DIM3) :: SIZES, SUB_SIZES, STARTS, STARTS_LOCAL
INTEGER, ALLOCATABLE, DIMENSION(:,:,:) :: A, LOCALDATA
INTEGER, ALLOCATABLE, DIMENSION(:) :: SENDCOUNTS, RECVCOUNTS, SENDDISPLS, RECVDISPLS
INTEGER :: SENDTYPE, I, J, K, START_INDX_I, START_INDX_J, START_INDX_K, END_INDX_I, END_INDX_J, END_INDX_K
INTEGER, ALLOCATABLE, DIMENSION(:) :: SEND_TYPES, RECV_TYPES

INTEGER(KIND=MPI_ADDRESS_KIND) :: LB, EXTENT
INTEGER, ALLOCATABLE, DIMENSION(:) :: RECV_BLOCK_TYPES, RESIZED_RECV_BLOCK_TYPES

CHARACTER (LEN = 11) :: STRING
CHARACTER(LEN=20) :: FMT
INTEGER :: STAT

CALL MPI_INIT(ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)

! size must be equal to NUM_PROC = 8
IF (rank == 0) THEN
  IF (size /= NUM_PROC) THEN
    WRITE(*,*) "Program must run with ", NUM_PROC, " cores."
    CALL MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
  END IF  
END IF

LIST_3D_SUBARRAY_SIZES = TRANSPOSE(RESHAPE((/ &
3, 3, 3, &
4, 3, 3, &
3, 4, 3, &
4, 4, 3, &
3, 3, 4, &
4, 3, 4, &
3, 4, 4, &
4, 4, 4  &
/), (/DIM3, NUM_PROC/)))
GLOBALSIZES(:) = 7
LOCALSIZES (:) = 7
STARTS_LOCAL = (/1, 1, 0/)
!displ. inside the root's final 3D matrix
DISPLACEMENTS(:) = (/0, 3, 21, 24,      &
                     147, 150, 168, 171/)

!initialize data
IF ( rank == 0 ) THEN
  ALLOCATE( A(GLOBALSIZES(1), GLOBALSIZES(2), GLOBALSIZES(3)) )
  A(:,:,:) = -1
END IF
ALLOCATE( LOCALDATA (LOCALSIZES(1), LOCALSIZES(2), LOCALSIZES(3)) )
LOCALDATA(:,:,:) = -2

START_INDX_I = STARTS_LOCAL(1)+1
START_INDX_J = STARTS_LOCAL(2)+1
START_INDX_K = STARTS_LOCAL(3)+1
END_INDX_I   = START_INDX_I + LIST_3D_SUBARRAY_SIZES(rank+1,1)
END_INDX_J   = START_INDX_J + LIST_3D_SUBARRAY_SIZES(rank+1,2)
END_INDX_K   = START_INDX_K + LIST_3D_SUBARRAY_SIZES(rank+1,3)

LOCALDATA(START_INDX_I:END_INDX_I, START_INDX_J:END_INDX_J, START_INDX_K:END_INDX_K) = rank
!allocate sendcounts, senddispls, sendtypes
ALLOCATE(SENDCOUNTS(size), SENDDISPLS(size), SEND_TYPES(size))
ALLOCATE(RECVCOUNTS(size), RECVDISPLS(size), RECV_TYPES(size))
SENDCOUNTS(:) = 0
SENDDISPLS(:) = 0
RECVCOUNTS(:) = 0
RECVDISPLS(:) = 0
RECV_TYPES(:) = MPI_INTEGER
SEND_TYPES(:) = MPI_INTEGER

!create type for sending to root
SIZES(:) = LOCALSIZES(:)
SUB_SIZES(:) = LIST_3D_SUBARRAY_SIZES(rank+1,:)
STARTS = STARTS_LOCAL
CALL MPI_TYPE_CREATE_SUBARRAY(DIM3, SIZES, SUB_SIZES, STARTS,                &
                              MPI_ORDER_FORTRAN, MPI_INTEGER, SENDTYPE, ierr)
CALL MPI_TYPE_COMMIT(SENDTYPE, ierr)
!send to root
SENDCOUNTS(1) = 1
SEND_TYPES(1) = SENDTYPE

!create type for receiving from others
IF ( rank == 0 ) THEN
  ALLOCATE(RECV_BLOCK_TYPES(size), RESIZED_RECV_BLOCK_TYPES(size))
  SIZES(:) = GLOBALSIZES(:)
  STARTS = (/0, 0, 0/)
  ! need to create size = NUM_PROC = 8 block types
  DO I = 1, size
    SUB_SIZES(:) = LIST_3D_SUBARRAY_SIZES(I,:)
    CALL MPI_TYPE_CREATE_SUBARRAY(DIM3, SIZES, SUB_SIZES, STARTS,                          &
                                  MPI_ORDER_FORTRAN, MPI_INTEGER, RECV_BLOCK_TYPES(I), ierr)
    CALL MPI_TYPE_COMMIT(RECV_BLOCK_TYPES(I), ierr)
    
    LB = 0
    CALL MPI_TYPE_GET_EXTENT(MPI_INTEGER, LB, EXTENT, ierr)
    CALL MPI_TYPE_CREATE_RESIZED(RECV_BLOCK_TYPES(I), LB, EXTENT, &
                                 RESIZED_RECV_BLOCK_TYPES(I), ierr)
    CALL MPI_TYPE_COMMIT(RESIZED_RECV_BLOCK_TYPES(I), ierr)
    
    RECV_TYPES(I) = RESIZED_RECV_BLOCK_TYPES(I)
    ! what data root expects from others
    RECVCOUNTS(I) = 1
    RECVDISPLS(I) = DISPLACEMENTS(I)*EXTENT
  END DO
END IF


CALL MPI_ALLTOALLW(LOCALDATA, SENDCOUNTS, SENDDISPLS, SEND_TYPES, &
                   A        , RECVCOUNTS, RECVDISPLS, RECV_TYPES, &
                   MPI_COMM_WORLD, ierr)

! write 2D slices (rows x columns) of the final 3D matrix
IF ( rank == 0 ) THEN
  WRITE ( STRING, '(I0)' ) GLOBALSIZES(2)
  FMT = '('//TRIM(STRING)//'(1X,I4))'
  DO K = 1, GLOBALSIZES(3)
    WRITE ( STRING, '(I0)' ) K
    OPEN( UNIT = UNITCONST + rank, FILE = 'MATRIX_2D_SLICE_'//TRIM(STRING)//'.DAT', STATUS = 'UNKNOWN', ACTION = 'WRITE', IOSTAT = STAT)
    DO I = 1, GLOBALSIZES(1)
      WRITE(UNITCONST + rank, FMT, IOSTAT = STAT) (A(I, J, K), J = 1, GLOBALSIZES(2))
    END DO
    CLOSE(UNITCONST + rank)
  END DO
END IF

CALL MPI_FINALIZE(ierr)

END PROGRAM MATRIX_3D_GATHER

对我来说,代码看起来有效。人们可以看到最终 3D 矩阵的输出 2D 切片

A
。我根据 @Jonathan Dursi 的答案编写了代码 使用 MPI 分散不同大小的矩阵块 , 使用 MPI_Gatherv for Fortran 在 Fortran 中使用 MPI_Gather 发送 2D 矩阵的 2D 数组。

问题:我正在使用

MPI_Alltoallw
来收集块(例如,参见 使用 MPI 分散不同大小的矩阵块)。我想知道这是否可以通过
MPI_Gatherv
实现?或者如果有人有更好的想法如何实现这个或改进代码。

非常感谢您花时间考虑这个问题。

multidimensional-array parallel-processing fortran mpi
1个回答
0
投票

此处不能使用

MPI_Gatherv
,因为每个部分都有不同的几何形状,并且需要自己的跨步 MPI 数据类型。
MPI_Gatherv
允许您使用不同大小的块,但所有块的数据类型相同。您可以使用它来收集和组装沿一个维度(所有碎片都相同)的不同尺寸的碎片,但前提是沿所有其他维度的尺寸都相同,本质上具有由不同数量的(超)板组成的碎片,这不是你的情况。

© www.soinside.com 2019 - 2024. All rights reserved.