cuBLAS 和 Eigen lib 的特征向量不匹配

问题描述 投票:0回答:1

我想用这个

4x4
协方差矩阵进行 EVD:

cuDoubleComplex m_cov_[16] = {
        make_cuDoubleComplex(2.0301223848037391, 3.4235008150792548e-17),
        make_cuDoubleComplex(1.0365842528472908, -2.1028119220635375),
        make_cuDoubleComplex(2.7516978261134084, 0.95059712173944222),
        make_cuDoubleComplex(-1.350157109070875, -1.1815219722269694),
        make_cuDoubleComplex(1.0365842528472908, 2.1028119220635375),
        make_cuDoubleComplex(2.7073859851827988, 8.7392491301242207e-17),
        make_cuDoubleComplex(0.42038828834095354, 3.3356003818061963),
        make_cuDoubleComplex(0.53443422887585779, -2.0017874620940646),
        make_cuDoubleComplex(2.7516978261134084, -0.95059712173944222),
        make_cuDoubleComplex(0.42038828834095354, -3.3356003818061963),
        make_cuDoubleComplex(4.1748595442022722, 2.0100622219496206e-17),
        make_cuDoubleComplex(-2.3832926547827333, -0.9692696339061293),
        make_cuDoubleComplex(-1.350157109070875, 1.1815219722269694),
        make_cuDoubleComplex(0.53443422887585779, 2.0017874620940646),
        make_cuDoubleComplex(-2.3832926547827333, 0.9692696339061293),
        make_cuDoubleComplex(1.5855784922744534, -1.5877902777669332e-17)
    };

// EVD
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXcd> eigensolver(m_cov_);
eigenValues_ = eigensolver.eigenvalues();
eigenVectors_ = eigensolver.eigenvectors();

通过 C++

Eigen
lib,
eigen values
如下:

{{x = -2.2573766836348074e-15, y = 0},
 {x = -9.709294041296323e-16, y = 0}, 
 {x = 8.9445808618868127e-17, y = 0},
 {x = 10.280850897111305, y = 0}}

4x4
eigen vectors
如下:

{{x = -0.3470929425161523, y = 0}, {x = -0.77713832029830621, y = -0}, {x = -0.15994536685820598, y = 0}, {x = 0.5, y = 0}}, 
{{x = -0.4597724303808105, y = 0.48181665117044714}, {x = 0.12469176895072034, y = -0.019363390950754053}, {x = -0.28597710463196591, y = 0.45689839613393701}, {x = -0.21684345356939669, y = 0.45053181535169839}},
{{x = -0.42256553981019185, y = -0.42733105533794741}, {x = 0.41437862159459787, y = 0.29068296724286291}, {x = 0.33214873805084277, y = 0.14932354148008731}, {x = 0.45697128218787925, y = 0.2029217761985285}}, 
{{x = -0.21707002501160269, y = 0.16642011333440535}, {x = -0.27240794554375036, y = 0.22298146715732753}, {x = 0.60350719516570406, y = -0.43247796708208042}, {x = -0.38102789443353835, y = 0.32375568514474662}}}

但是对于相同的输入,cuBLAS 返回不同的结果。

我使用下面的cuBLAS EVD功能:

CHECK_CUSOLVER(cusolverDnZheevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, m_eigen_vec.data(), N, m_eigen_value.data(), d_work, lwork, devInfo));

特征值和特征向量如下:

// Eigen Values
{-5.2180864461495171e-16,
-3.0110342183593702e-16,
2.8548936444323134e-16, 
10.497946406463264}

// Eigen Vectors
{{x = -0.38208898741712083, y = 0.41581487741664563}, {x = 0.32043709404849968, 
    y = -0.4517568232420171}, {x = 0.21717632600175682, y = -0.36577789346231687}, {x = -0.33093161501032731, 
    y = 0.28959813033042575}, {x = -0.17547383350755327, y = -0.69642001620440552}, {x = -0.10241833368805221, 
    y = -0.43952076894648784}, {x = 0.047402059701005216, y = 0.14281594546174881}, {x = 0.13099303872894871, 
    y = 0.49065012752041015}, {x = -0.32475905997549959, y = 0.077154117638887132}, {x = -0.32253254381877083, 
    y = 0.2157036870035538}, {x = -0.50261510442239055, y = 0.29617238148252628}, {x = -0.58415934115419899, 
    y = 0.23757380765099248}, {x = -0.23214840790484073, y = 0}, {x = 0.58224779741288002, y = 0}, {x = -0.67532037122395228, 
    y = 0}, {x = 0.38863480971863701, y = 0}}

您可以注意到,来自

eigen values
Eigen Lib
的排序
cuBLAS
计算是相同的(三个 ~0 值和一个更大的值),但
eigen vectors
完全不同。

我为每个步骤编写了很多单元测试,并且我确信来自

Eigen lib
的特征值和向量是正确的,有人可以告诉我如何使用
cuBLAS
EVD 获得相同的答案吗?

c++ cuda cublas
1个回答
0
投票

提升并修改我修改的框架here,cusolver的结果似乎检查了方程A * V = V * D,其中A是问题中的输入矩阵,V是cusolver返回的特征向量矩阵, D 是 cusolver 返回的特征值的对角矩阵:

# cat t247.cu
#include <iostream>
#include <vector>
#include <cuComplex.h>
#include <cusolverDn.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>
const int N = 4;
cuDoubleComplex mc[N*N] = {
        make_cuDoubleComplex(2.0301223848037391, 3.4235008150792548e-17),
        make_cuDoubleComplex(1.0365842528472908, -2.1028119220635375),
        make_cuDoubleComplex(2.7516978261134084, 0.95059712173944222),
        make_cuDoubleComplex(-1.350157109070875, -1.1815219722269694),
        make_cuDoubleComplex(1.0365842528472908, 2.1028119220635375),
        make_cuDoubleComplex(2.7073859851827988, 8.7392491301242207e-17),
        make_cuDoubleComplex(0.42038828834095354, 3.3356003818061963),
        make_cuDoubleComplex(0.53443422887585779, -2.0017874620940646),
        make_cuDoubleComplex(2.7516978261134084, -0.95059712173944222),
        make_cuDoubleComplex(0.42038828834095354, -3.3356003818061963),
        make_cuDoubleComplex(4.1748595442022722, 2.0100622219496206e-17),
        make_cuDoubleComplex(-2.3832926547827333, -0.9692696339061293),
        make_cuDoubleComplex(-1.350157109070875, 1.1815219722269694),
        make_cuDoubleComplex(0.53443422887585779, 2.0017874620940646),
        make_cuDoubleComplex(-2.3832926547827333, 0.9692696339061293),
        make_cuDoubleComplex(1.5855784922744534, -1.5877902777669332e-17)
    };

// Helper function to print a complex matrix
void printMatrix(const cuDoubleComplex* A, int rows, int cols) {
    for (int i = 0; i < cols; ++i) {
        for (int j = 0; j < rows; ++j) {
            std::cout << "(" << cuCreal(A[j * cols + i]) << ", " << cuCimag(A[j * cols + i]) << ") ";
        }
        std::cout << std::endl;
    }
}

// Helper function to check if two complex numbers are approximately equal
bool isApproxEqual(cuDoubleComplex a, cuDoubleComplex b, double tol = 1e-6) {
    return (fabs(cuCreal(a) - cuCreal(b)) < tol) && (fabs(cuCimag(a) - cuCimag(b)) < tol);
}

int main() {
    // Define a 4x4 complex covariance matrix
#ifdef USE_TRANSPOSE
    cuDoubleComplex *A = new cuDoubleComplex[N*N];
    for (int i = 0; i < N; i++)
      for (int j = 0; j < N; j++)
        A[i*N+j] = mc[j*N+i];
#else
    cuDoubleComplex *A = mc;
#endif
    std::cout << "Original matrix A:" << std::endl;
    printMatrix(A, N, N);

    // cuBLAS and cuSOLVER setup
    cusolverDnHandle_t cusolverH = nullptr;
    cublasHandle_t cublasH = nullptr;
    cudaStream_t stream = nullptr;
    cusolverStatus_t csstat = cusolverDnCreate(&cusolverH);
    if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
    cublasStatus_t cbstat = cublasCreate(&cublasH);
    if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
  //  cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
    cudaStreamCreate(&stream);
    csstat = cusolverDnSetStream(cusolverH, stream);
    if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
    cbstat = cublasSetStream(cublasH, stream);
    if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;

    // Device memory for matrix A, eigenvalues, and workspace
    cuDoubleComplex* d_A = nullptr;
    double* d_W = nullptr;
    int* devInfo = nullptr;
    int lwork = 0;
    cuDoubleComplex* d_work = nullptr;

    cudaMalloc((void**)&d_A, sizeof(cuDoubleComplex) * N * N);
    cudaMalloc((void**)&d_W, sizeof(double) * N);
    cudaMalloc((void**)&devInfo, sizeof(int));

    cudaMemcpy(d_A, A, sizeof(cuDoubleComplex) * N * N, cudaMemcpyHostToDevice);

    // Query working space
    csstat = cusolverDnZheevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, d_A, N, d_W, &lwork);
    if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
    cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork);
    // Compute EVD
    csstat = cusolverDnZheevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, d_A, N, d_W, d_work, lwork, devInfo);
    if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;

    // Copy results back to host
    cuDoubleComplex V[N * N];
    double W[N];
    cudaMemcpy(V, d_A, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
    cudaMemcpy(W, d_W, sizeof(double) * N, cudaMemcpyDeviceToHost);
    int hinfo;
    cudaMemcpy(&hinfo, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
    if (hinfo) std::cout << "devInfo: " << hinfo << std::endl;
    std::cout << "Eigenvalues (diagonal of D):" << std::endl;
    for (int i = 0; i < N; ++i) {
        std::cout << W[i] << " ";
    }
    std::cout << std::endl;

    std::cout << "Eigenvectors (columns of V):" << std::endl;
    printMatrix(V, N, N);

    // Verify A * V = V * D using cuBLAS
    cuDoubleComplex* d_V = nullptr;
    cuDoubleComplex* d_D = nullptr;
    cuDoubleComplex* d_DV = nullptr;
    cuDoubleComplex* d_AV = nullptr;

    cudaMalloc((void**)&d_V, sizeof(cuDoubleComplex) * N * N);
    cudaMalloc((void**)&d_D, sizeof(cuDoubleComplex) * N * N);
    cudaMalloc((void**)&d_DV, sizeof(cuDoubleComplex) * N * N);
    cudaMalloc((void**)&d_AV, sizeof(cuDoubleComplex) * N * N);

    cudaMemset(d_D, 0, N*N*sizeof(cuDoubleComplex));
    cuDoubleComplex *ev = new cuDoubleComplex[N];
    for (int i = 0; i < N; i++) ev[i] = make_cuDoubleComplex(W[i], 0.0);
    for (int i = 0; i < N; i++) cudaMemcpy(d_D+(i*(N+1)), ev+i, sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);

    cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
    cuDoubleComplex beta = make_cuDoubleComplex(0.0, 0.0);

    // Calculate V * D  d_A now contains the eigenvectors, it is effectively V
    cbstat = cublasZgemm(cublasH, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, d_A, N, d_D, N, &beta, d_DV, N);
    if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;

    cuDoubleComplex* d_nA = nullptr;
    cudaMalloc(&d_nA, N*N*sizeof(cuDoubleComplex));
    cudaMemcpy(d_nA, A, N*N*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
    // Calculate A * V
    cbstat = cublasZgemm(cublasH, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, d_nA, N, d_A, N, &beta, d_AV, N);
    if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;

    // Copy results back to host
    cuDoubleComplex AV[N * N];
    cuDoubleComplex DV[N * N];
    cudaMemcpy(AV, d_AV, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
    cudaMemcpy(DV, d_DV, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);

    std::cout << "A * V:" << std::endl;
    printMatrix(AV, N, N);

    std::cout << "V * D:" << std::endl;
    printMatrix(DV, N, N);

    // Check if A * V is approximately equal to V * D
    bool equal = true;
    for (int i = 0; i < N; ++i) {
        for (int j = 0; j < N; ++j) {
            if (!isApproxEqual(AV[i * N + j], DV[i * N + j])) {
                equal = false;
                break;
            }
        }
    }

    if (equal) {
        std::cout << "Verification successful: A * V = V * D" << std::endl;
    } else {
        std::cout << "Verification failed: A * V != V * D" << std::endl;
    }

    // Clean up
    cudaFree(d_A);
    cudaFree(d_W);
    cudaFree(devInfo);
    cudaFree(d_work);
    cudaFree(d_V);
    cudaFree(d_DV);
    cudaFree(d_AV);
    cusolverDnDestroy(cusolverH);
    cublasDestroy(cublasH);
    cudaStreamDestroy(stream);
    return 0;
}
# nvcc -o t247 t247.cu -lcublas -lcusolver
# compute-sanitizer ./t247
========= COMPUTE-SANITIZER
Original matrix A:
(2.03012, 3.4235e-17) (1.03658, 2.10281) (2.7517, -0.950597) (-1.35016, 1.18152)
(1.03658, -2.10281) (2.70739, 8.73925e-17) (0.420388, -3.3356) (0.534434, 2.00179)
(2.7517, 0.950597) (0.420388, 3.3356) (4.17486, 2.01006e-17) (-2.38329, 0.96927)
(-1.35016, -1.18152) (0.534434, -2.00179) (-2.38329, -0.96927) (1.58558, -1.58779e-17)
Eigenvalues (diagonal of D):
-5.21809e-16 -3.01103e-16 2.85489e-16 10.4979
Eigenvectors (columns of V):
(-0.382089, 0.415815) (0.320437, -0.451757) (0.217176, -0.365778) (-0.330932, 0.289598)
(-0.175474, -0.69642) (-0.102418, -0.439521) (0.0474021, 0.142816) (0.130993, 0.49065)
(-0.324759, 0.0771541) (-0.322533, 0.215704) (-0.502615, 0.296172) (-0.584159, 0.237574)
(-0.232148, 0) (0.582248, 0) (-0.67532, 0) (0.388635, 0)
A * V:
(-7.00371e-17, 2.32465e-16) (-4.94086e-17, 4.01301e-16) (-5.28972e-17, 4.03915e-17) (-3.4741, 3.04019)
(2.63345e-16, 3.01536e-16) (3.27777e-16, 3.2118e-16) (-8.58426e-17, -4.60878e-16) (1.37516, 5.15082)
(-1.31867e-16, 5.0219e-16) (-2.23634e-16, 2.87755e-16) (4.18528e-16, -2.85843e-16) (-6.13247, 2.49404)
(1.07082e-16, -1.23345e-16) (1.49087e-16, -1.99645e-16) (7.87887e-17, -1.82635e-17) (4.07987, -2.48075e-16)
V * D:
(1.99377e-16, -2.16976e-16) (-9.64847e-17, 1.36026e-16) (6.20015e-17, -1.04426e-16) (-3.4741, 3.04019)
(9.15638e-17, 3.63398e-16) (3.08385e-17, 1.32341e-16) (1.35328e-17, 4.07724e-17) (1.37516, 5.15082)
(1.69462e-16, -4.02597e-17) (9.71157e-17, -6.49491e-17) (-1.43491e-16, 8.45541e-17) (-6.13247, 2.49404)
(1.21137e-16, 0) (-1.75317e-16, 0) (-1.92797e-16, 0) (4.07987, 0)
Verification successful: A * V = V * D
========= ERROR SUMMARY: 0 errors
#
© www.soinside.com 2019 - 2024. All rights reserved.