当我尝试从头开始编写PCA,使用幂法技术来计算特征值时,只有第一个是正确的,其余的都是错误的。我做错了什么?
使用 numpy ,协方差矩阵的特征值为 [9.49506917 8.11919641 5.27687829] 而我的为 [9.495069168715409, 485.7201377095837, 25631.534012572032]。我也在分享我的代码结构。
def normalize_data(train_x, test_x=None):
scaler = StandardScaler()
train_normalized = pd.DataFrame(scaler.fit_transform(train_x), columns=train_x.columns)
if test_x is not None:
test_normalized = pd.DataFrame(scaler.transform(test_x), columns=test_x.columns)
else:
test_normalized = None
return train_normalized, test_normalized
def extract_PCA(X, n_components=3):
n = X.shape[0]
cov_matrix = (X.T @ X) / (n-1)
eigen_vectors = []
eigen_values = []
for _ in range(n_components):
# Find dominant eigen value and its corresponding eigen vector
eigen_value, eigen_vector = using_power_method_find_dominant_eigen_value(cov_matrix)
# eigen_vectors.append(eigen_vector)
eigen_values.append(eigen_value)
eigen_vector = eigen_vector.reshape(256, 1)
cov_matrix = cov_matrix - ( eigen_value * (eigen_vector @ eigen_vector.T) )
def using_power_method_find_dominant_eigen_value(mat_A, tolrence = 1e-10):
n = mat_A.shape[0]
x = np.ones(n)
prev_value = 0
prev_eigen_vector = x
# while True:
for _ in range(1000):
x = np.dot(mat_A, x)
eigen_value, x = vector_norm(x)
print(eigen_value, np.linalg.norm(prev_eigen_vector - x, 2))
if ( abs(eigen_value - prev_value) < tolrence ) and ( np.linalg.norm(prev_eigen_vector - x, 2) < tolrence):
break
prev_value = eigen_value
prev_eigen_vector = x
return eigen_value, x
def vector_norm(v):
norm_coeff = max(v)
# Step 2: Normalize the vector
normalized_v = [vi / norm_coeff for vi in v]
return norm_coeff, np.array(normalized_v)
if __name__ == "__main__":
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
X_train, y_train = train_df.drop(columns= ['y'], axis=1), train_df['y']
X_test, y_test = test_df.drop(columns = ['y'], axis=1), test_df['y']
X_train, X_test = normalize_data(X_train, X_test)
extract_PCA(X_train)
`
幂方法是一种特征值方法,只能找到单个最大特征值(在您的情况下为 9.495....)及其相应的特征向量。
如果您想要所有特征值,您将需要不同的方法。
[ https://www.sciencedirect.com/science/article/pii/B9780123944351000223/pdfft ]