亲爱的大家,我试图通过谷歌搜索找到答案,但一直找不到答案。
我在 MPI Fotran 应用程序中使用 fftw,我需要逐个组件计算张量组件的 3D 数组的前向和后向变换,并在傅里叶空间中计算一些复杂的张量。 为了使 ffftw 使用的数组有用并且不花费大量时间将数据从一个数组移动到另一个数组,我想到的选项是声明一个 5 维数组:即
use, intrinsic :: iso_c_binding
call MPI_INIT( mpi_err )
call MPI_COMM_RANK( MPI_COMM_WORLD, mpi_rank, mpi_err )
call MPI_COMM_SIZE( MPI_COMM_WORLD, mpi_size, mpi_err )
integer(C_INTPTR_T), parameter :: FFTDIM=3 !fft dimension
integer(C_INTPTR_T) :: fft_L !x direction
integer(C_INTPTR_T) :: fft_M !y direction
integer(C_INTPTR_T) :: fft_N !z direction
complex(C_DOUBLE_COMPLEX), pointer :: fft_in(:,:,:,:,:), fft_out(:,:,:,:,:)
type(C_PTR) :: fft_plan_fwd, fft_plan_bkw, fft_datapointer
integer(C_INTPTR_T) :: fft_alloc_local, fft_local_n0, fft_local_0_start
include 'mpif.h'
include 'fftw3-mpi.f03'
call fftw_mpi_init
fft_L=problem_dim(1)
fft_M=problem_dim(2)
fft_N=problem_dim(3)
! CALCULATE LOCAL SIZE OF FFT VARIABLE FOR EACH COMPOENNT
fft_alloc_local = fftw_mpi_local_size_3d(fft_N,fft_M,fft_L, MPI_COMM_WORLD, &
fft_local_n0, fft_local_0_start)
! allocate data pointer
fft_datapointer = fftw_alloc_complex(9*int(fft_alloc_local,C_SIZE_T))
! link pointers to the same array
call c_f_pointer(fft_datapointer, fft_in, [ FFTDIM, FFTDIM, fft_L, fft_M, fft_local_n0])
call c_f_pointer(fft_datapointer, fft_out, [ FFTDIM, FFTDIM, fft_L, fft_M, fft_local_n0])
! create plans
fft_plan_fwd = fftw_MPI_plan_dft_3d(fft_N, fft_M, fft_L, & !dimension
fft_in(1,1,:,:,:), fft_out(1,1,:,:,:), & !inpu, output
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_MEASURE)
fft_plan_bkw = fftw_MPI_plan_dft_3d(fft_N, fft_M, fft_L, & !dimension
fft_in(1,1,:,:,:), fft_out(1,1,:,:,:), & !inpu, output
MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_MEASURE)
现在,如果我使用这段代码并且处理器的数量是 2 的倍数(2,4,8...),则一切正常,但如果我使用 6,应用程序将给出错误。我该如何解决这个问题? 您是否有更好的策略而不是分配 5d 数组并且不移动到许多数据?
我利用 fffw_mpi_plan_many 接口找到了这个问题的解决方案 执行此计算的代码如下。它利用 MPI 功能计算张量分量 (11,12,...) 的 3D(LxMxN) 复数到复数变换。第三维度(N)的范围必须能被所使用的核心数量整除
program test_fftw
use, intrinsic :: iso_c_binding
implicit none
include 'mpif.h'
include 'fftw3-mpi.f03'
integer(C_INTPTR_T) :: L = 8 ! extent of x data
integer(C_INTPTR_T) :: M = 8 ! extent of y data
integer(C_INTPTR_T) :: N = 192 ! extent of z data
integer(C_INTPTR_T) :: FFT_12_DIM = 3 ! tensor dimension
integer(C_INTPTR_T) :: ll, mm, nn, i, j
complex(C_DOUBLE_COMPLEX) :: fout
! many plan data variables
integer(C_INTPTR_T) :: howmany=9 ! numer of eleemnt of the tensor
integer :: rank=3 ! rank of the transform
integer(C_INTPTR_T), dimension(3) :: fft_dims ! array containing data extent
integer(C_INTPTR_T) :: alloc_local_many, fft_local_n0, fft_local_0_start
complex(C_DOUBLE_COMPLEX), pointer :: fft_data(:,:,:,:,:)
type(C_PTR) ::fft_datapointer, plan_many
integer :: ierr, myid, nproc
! Initialize
call mpi_init(ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD, nproc, ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD, myid, ierr)
call fftw_mpi_init()
! write data dimenion in reversed order
fft_dims(3) = L
fft_dims(2) = M
fft_dims(1) = N
! use of alloc many
alloc_local_many = fftw_mpi_local_size_many(rank, & ! rank of the transform in this case 3
fft_dims, & ! array containing data dimension in reversed order
howmany, & ! numebr of transform to compute in this case 3x3=9
FFTW_MPI_DEFAULT_BLOCK, & !default block size
MPI_COMM_WORLD, & ! mpi communicator
fft_local_n0, & ! local numebr of slice by core
fft_local_0_start) ! local shift on the last dimension
fft_datapointer = fftw_alloc_complex(alloc_local_many) ! allocate aligned memory for the data
! associate data pointer with allocated memory: note natural order
call c_f_pointer(fft_datapointer, fft_data, [FFT_12_DIM,FFT_12_DIM,L,M, fft_local_n0])
! create the plan for many inplace multidimensional transform
plan_many = fftw_mpi_plan_many_dft( &
rank , fft_dims, howmany, &
FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, &
fft_data, fft_data, &
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE )
! initialize data to some function my_function(i,j)
do nn = 1, fft_local_n0
do mm = 1, M
do ll = 1, L
do i = 1, FFT_12_DIM
do j = 1, FFT_12_DIM
fout = ll*mm*nn*i*j
fft_data(i,j,ll,mm,nn) = fout
end do
end do
end do
end do
enddo
call fftw_mpi_execute_dft(plan_many, fft_data, fft_data)!
call fftw_destroy_plan(plan_many)
call fftw_mpi_cleanup()
call fftw_free(fft_datapointer)
call mpi_finalize(ierr)
end program test_fftw
谢谢大家的帮助!!