我想用这个
4x4
协方差矩阵进行 EVD:
cuDoubleComplex m_cov_[16] = {
make_cuDoubleComplex(2.0301223848037391, 3.4235008150792548e-17),
make_cuDoubleComplex(1.0365842528472908, -2.1028119220635375),
make_cuDoubleComplex(2.7516978261134084, 0.95059712173944222),
make_cuDoubleComplex(-1.350157109070875, -1.1815219722269694),
make_cuDoubleComplex(1.0365842528472908, 2.1028119220635375),
make_cuDoubleComplex(2.7073859851827988, 8.7392491301242207e-17),
make_cuDoubleComplex(0.42038828834095354, 3.3356003818061963),
make_cuDoubleComplex(0.53443422887585779, -2.0017874620940646),
make_cuDoubleComplex(2.7516978261134084, -0.95059712173944222),
make_cuDoubleComplex(0.42038828834095354, -3.3356003818061963),
make_cuDoubleComplex(4.1748595442022722, 2.0100622219496206e-17),
make_cuDoubleComplex(-2.3832926547827333, -0.9692696339061293),
make_cuDoubleComplex(-1.350157109070875, 1.1815219722269694),
make_cuDoubleComplex(0.53443422887585779, 2.0017874620940646),
make_cuDoubleComplex(-2.3832926547827333, 0.9692696339061293),
make_cuDoubleComplex(1.5855784922744534, -1.5877902777669332e-17)
};
// EVD
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXcd> eigensolver(m_cov_);
eigenValues_ = eigensolver.eigenvalues();
eigenVectors_ = eigensolver.eigenvectors();
通过 C++
Eigen
lib,eigen values
如下:
{{x = -2.2573766836348074e-15, y = 0},
{x = -9.709294041296323e-16, y = 0},
{x = 8.9445808618868127e-17, y = 0},
{x = 10.280850897111305, y = 0}}
4x4
eigen vectors
如下:
{{x = -0.3470929425161523, y = 0}, {x = -0.77713832029830621, y = -0}, {x = -0.15994536685820598, y = 0}, {x = 0.5, y = 0}},
{{x = -0.4597724303808105, y = 0.48181665117044714}, {x = 0.12469176895072034, y = -0.019363390950754053}, {x = -0.28597710463196591, y = 0.45689839613393701}, {x = -0.21684345356939669, y = 0.45053181535169839}},
{{x = -0.42256553981019185, y = -0.42733105533794741}, {x = 0.41437862159459787, y = 0.29068296724286291}, {x = 0.33214873805084277, y = 0.14932354148008731}, {x = 0.45697128218787925, y = 0.2029217761985285}},
{{x = -0.21707002501160269, y = 0.16642011333440535}, {x = -0.27240794554375036, y = 0.22298146715732753}, {x = 0.60350719516570406, y = -0.43247796708208042}, {x = -0.38102789443353835, y = 0.32375568514474662}}}
但是对于相同的输入,cuBLAS 返回不同的结果。
我使用下面的cuBLAS EVD功能:
CHECK_CUSOLVER(cusolverDnZheevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, m_eigen_vec.data(), N, m_eigen_value.data(), d_work, lwork, devInfo));
特征值和特征向量如下:
// Eigen Values
{-5.2180864461495171e-16,
-3.0110342183593702e-16,
2.8548936444323134e-16,
10.497946406463264}
// Eigen Vectors
{{x = -0.38208898741712083, y = 0.41581487741664563}, {x = 0.32043709404849968,
y = -0.4517568232420171}, {x = 0.21717632600175682, y = -0.36577789346231687}, {x = -0.33093161501032731,
y = 0.28959813033042575}, {x = -0.17547383350755327, y = -0.69642001620440552}, {x = -0.10241833368805221,
y = -0.43952076894648784}, {x = 0.047402059701005216, y = 0.14281594546174881}, {x = 0.13099303872894871,
y = 0.49065012752041015}, {x = -0.32475905997549959, y = 0.077154117638887132}, {x = -0.32253254381877083,
y = 0.2157036870035538}, {x = -0.50261510442239055, y = 0.29617238148252628}, {x = -0.58415934115419899,
y = 0.23757380765099248}, {x = -0.23214840790484073, y = 0}, {x = 0.58224779741288002, y = 0}, {x = -0.67532037122395228,
y = 0}, {x = 0.38863480971863701, y = 0}}
您可以注意到,来自
eigen values
和 Eigen Lib
的排序 cuBLAS
计算是相同的(三个 ~0 值和一个更大的值),但 eigen vectors
完全不同。
我为每个步骤编写了很多单元测试,并且我确信来自
Eigen lib
的特征值和向量是正确的,有人可以告诉我如何使用 cuBLAS
EVD 获得相同的答案吗?
提升并修改我修改的框架here,cusolver的结果似乎检查了方程A * V = V * D,其中A是问题中的输入矩阵,V是cusolver返回的特征向量矩阵, D 是 cusolver 返回的特征值的对角矩阵:
# cat t247.cu
#include <iostream>
#include <vector>
#include <cuComplex.h>
#include <cusolverDn.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>
const int N = 4;
cuDoubleComplex mc[N*N] = {
make_cuDoubleComplex(2.0301223848037391, 3.4235008150792548e-17),
make_cuDoubleComplex(1.0365842528472908, -2.1028119220635375),
make_cuDoubleComplex(2.7516978261134084, 0.95059712173944222),
make_cuDoubleComplex(-1.350157109070875, -1.1815219722269694),
make_cuDoubleComplex(1.0365842528472908, 2.1028119220635375),
make_cuDoubleComplex(2.7073859851827988, 8.7392491301242207e-17),
make_cuDoubleComplex(0.42038828834095354, 3.3356003818061963),
make_cuDoubleComplex(0.53443422887585779, -2.0017874620940646),
make_cuDoubleComplex(2.7516978261134084, -0.95059712173944222),
make_cuDoubleComplex(0.42038828834095354, -3.3356003818061963),
make_cuDoubleComplex(4.1748595442022722, 2.0100622219496206e-17),
make_cuDoubleComplex(-2.3832926547827333, -0.9692696339061293),
make_cuDoubleComplex(-1.350157109070875, 1.1815219722269694),
make_cuDoubleComplex(0.53443422887585779, 2.0017874620940646),
make_cuDoubleComplex(-2.3832926547827333, 0.9692696339061293),
make_cuDoubleComplex(1.5855784922744534, -1.5877902777669332e-17)
};
// Helper function to print a complex matrix
void printMatrix(const cuDoubleComplex* A, int rows, int cols) {
for (int i = 0; i < cols; ++i) {
for (int j = 0; j < rows; ++j) {
std::cout << "(" << cuCreal(A[j * cols + i]) << ", " << cuCimag(A[j * cols + i]) << ") ";
}
std::cout << std::endl;
}
}
// Helper function to check if two complex numbers are approximately equal
bool isApproxEqual(cuDoubleComplex a, cuDoubleComplex b, double tol = 1e-6) {
return (fabs(cuCreal(a) - cuCreal(b)) < tol) && (fabs(cuCimag(a) - cuCimag(b)) < tol);
}
int main() {
// Define a 4x4 complex covariance matrix
#ifdef USE_TRANSPOSE
cuDoubleComplex *A = new cuDoubleComplex[N*N];
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
A[i*N+j] = mc[j*N+i];
#else
cuDoubleComplex *A = mc;
#endif
std::cout << "Original matrix A:" << std::endl;
printMatrix(A, N, N);
// cuBLAS and cuSOLVER setup
cusolverDnHandle_t cusolverH = nullptr;
cublasHandle_t cublasH = nullptr;
cudaStream_t stream = nullptr;
cusolverStatus_t csstat = cusolverDnCreate(&cusolverH);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
cublasStatus_t cbstat = cublasCreate(&cublasH);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
// cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
cudaStreamCreate(&stream);
csstat = cusolverDnSetStream(cusolverH, stream);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
cbstat = cublasSetStream(cublasH, stream);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
// Device memory for matrix A, eigenvalues, and workspace
cuDoubleComplex* d_A = nullptr;
double* d_W = nullptr;
int* devInfo = nullptr;
int lwork = 0;
cuDoubleComplex* d_work = nullptr;
cudaMalloc((void**)&d_A, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_W, sizeof(double) * N);
cudaMalloc((void**)&devInfo, sizeof(int));
cudaMemcpy(d_A, A, sizeof(cuDoubleComplex) * N * N, cudaMemcpyHostToDevice);
// Query working space
csstat = cusolverDnZheevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, d_A, N, d_W, &lwork);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
cudaMalloc((void**)&d_work, sizeof(cuDoubleComplex) * lwork);
// Compute EVD
csstat = cusolverDnZheevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER, N, d_A, N, d_W, d_work, lwork, devInfo);
if (csstat != CUSOLVER_STATUS_SUCCESS) std::cout << __LINE__ << " cusolver error: " << (int)csstat << std::endl;
// Copy results back to host
cuDoubleComplex V[N * N];
double W[N];
cudaMemcpy(V, d_A, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
cudaMemcpy(W, d_W, sizeof(double) * N, cudaMemcpyDeviceToHost);
int hinfo;
cudaMemcpy(&hinfo, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
if (hinfo) std::cout << "devInfo: " << hinfo << std::endl;
std::cout << "Eigenvalues (diagonal of D):" << std::endl;
for (int i = 0; i < N; ++i) {
std::cout << W[i] << " ";
}
std::cout << std::endl;
std::cout << "Eigenvectors (columns of V):" << std::endl;
printMatrix(V, N, N);
// Verify A * V = V * D using cuBLAS
cuDoubleComplex* d_V = nullptr;
cuDoubleComplex* d_D = nullptr;
cuDoubleComplex* d_DV = nullptr;
cuDoubleComplex* d_AV = nullptr;
cudaMalloc((void**)&d_V, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_D, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_DV, sizeof(cuDoubleComplex) * N * N);
cudaMalloc((void**)&d_AV, sizeof(cuDoubleComplex) * N * N);
cudaMemset(d_D, 0, N*N*sizeof(cuDoubleComplex));
cuDoubleComplex *ev = new cuDoubleComplex[N];
for (int i = 0; i < N; i++) ev[i] = make_cuDoubleComplex(W[i], 0.0);
for (int i = 0; i < N; i++) cudaMemcpy(d_D+(i*(N+1)), ev+i, sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
cuDoubleComplex beta = make_cuDoubleComplex(0.0, 0.0);
// Calculate V * D d_A now contains the eigenvectors, it is effectively V
cbstat = cublasZgemm(cublasH, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, d_A, N, d_D, N, &beta, d_DV, N);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
cuDoubleComplex* d_nA = nullptr;
cudaMalloc(&d_nA, N*N*sizeof(cuDoubleComplex));
cudaMemcpy(d_nA, A, N*N*sizeof(cuDoubleComplex), cudaMemcpyHostToDevice);
// Calculate A * V
cbstat = cublasZgemm(cublasH, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, d_nA, N, d_A, N, &beta, d_AV, N);
if (cbstat != CUBLAS_STATUS_SUCCESS) std::cout << __LINE__ << " cublas error: " << (int)cbstat << std::endl;
// Copy results back to host
cuDoubleComplex AV[N * N];
cuDoubleComplex DV[N * N];
cudaMemcpy(AV, d_AV, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
cudaMemcpy(DV, d_DV, sizeof(cuDoubleComplex) * N * N, cudaMemcpyDeviceToHost);
std::cout << "A * V:" << std::endl;
printMatrix(AV, N, N);
std::cout << "V * D:" << std::endl;
printMatrix(DV, N, N);
// Check if A * V is approximately equal to V * D
bool equal = true;
for (int i = 0; i < N; ++i) {
for (int j = 0; j < N; ++j) {
if (!isApproxEqual(AV[i * N + j], DV[i * N + j])) {
equal = false;
break;
}
}
}
if (equal) {
std::cout << "Verification successful: A * V = V * D" << std::endl;
} else {
std::cout << "Verification failed: A * V != V * D" << std::endl;
}
// Clean up
cudaFree(d_A);
cudaFree(d_W);
cudaFree(devInfo);
cudaFree(d_work);
cudaFree(d_V);
cudaFree(d_DV);
cudaFree(d_AV);
cusolverDnDestroy(cusolverH);
cublasDestroy(cublasH);
cudaStreamDestroy(stream);
return 0;
}
# nvcc -o t247 t247.cu -lcublas -lcusolver
# compute-sanitizer ./t247
========= COMPUTE-SANITIZER
Original matrix A:
(2.03012, 3.4235e-17) (1.03658, 2.10281) (2.7517, -0.950597) (-1.35016, 1.18152)
(1.03658, -2.10281) (2.70739, 8.73925e-17) (0.420388, -3.3356) (0.534434, 2.00179)
(2.7517, 0.950597) (0.420388, 3.3356) (4.17486, 2.01006e-17) (-2.38329, 0.96927)
(-1.35016, -1.18152) (0.534434, -2.00179) (-2.38329, -0.96927) (1.58558, -1.58779e-17)
Eigenvalues (diagonal of D):
-5.21809e-16 -3.01103e-16 2.85489e-16 10.4979
Eigenvectors (columns of V):
(-0.382089, 0.415815) (0.320437, -0.451757) (0.217176, -0.365778) (-0.330932, 0.289598)
(-0.175474, -0.69642) (-0.102418, -0.439521) (0.0474021, 0.142816) (0.130993, 0.49065)
(-0.324759, 0.0771541) (-0.322533, 0.215704) (-0.502615, 0.296172) (-0.584159, 0.237574)
(-0.232148, 0) (0.582248, 0) (-0.67532, 0) (0.388635, 0)
A * V:
(-7.00371e-17, 2.32465e-16) (-4.94086e-17, 4.01301e-16) (-5.28972e-17, 4.03915e-17) (-3.4741, 3.04019)
(2.63345e-16, 3.01536e-16) (3.27777e-16, 3.2118e-16) (-8.58426e-17, -4.60878e-16) (1.37516, 5.15082)
(-1.31867e-16, 5.0219e-16) (-2.23634e-16, 2.87755e-16) (4.18528e-16, -2.85843e-16) (-6.13247, 2.49404)
(1.07082e-16, -1.23345e-16) (1.49087e-16, -1.99645e-16) (7.87887e-17, -1.82635e-17) (4.07987, -2.48075e-16)
V * D:
(1.99377e-16, -2.16976e-16) (-9.64847e-17, 1.36026e-16) (6.20015e-17, -1.04426e-16) (-3.4741, 3.04019)
(9.15638e-17, 3.63398e-16) (3.08385e-17, 1.32341e-16) (1.35328e-17, 4.07724e-17) (1.37516, 5.15082)
(1.69462e-16, -4.02597e-17) (9.71157e-17, -6.49491e-17) (-1.43491e-16, 8.45541e-17) (-6.13247, 2.49404)
(1.21137e-16, 0) (-1.75317e-16, 0) (-1.92797e-16, 0) (4.07987, 0)
Verification successful: A * V = V * D
========= ERROR SUMMARY: 0 errors
#