请参阅下面我尝试实现块三对角托马斯算法的尝试。然而,如果您运行此程序,即使对于这种非常简单的情况,与 np 直接求解 (10^-15) 相比,您也会在 TMDA 块中得到相对较大的 (10^-2) 错误。更复杂的测试用例会产生更大的错误 - 我认为错误在反向替换时开始增加。任何有关原因的帮助将不胜感激!
import numpy as np
import torch
def solve_block_tridiagonal(a, b, c, d):
N = len(b)
x = np.zeros_like(d)
# Forward elimination with explicit C* and d* storage
C_star = np.zeros_like(c)
d_star = np.zeros_like(d)
# Initial calculations for C_0* and d_0*
C_star[0] = np.linalg.solve(b[0], c[0])
d_star[0] = np.linalg.solve(b[0], d[0])
# Forward elimination
for i in range(1, N - 1):
C_star[i] = np.linalg.solve(b[i] - a[i-1] @ C_star[i-1], c[i])
d_star[i] = np.linalg.solve(b[i] - a[i-1] @ C_star[i-1], d[i] - a[i-1] @ d_star[i-1])
# Last d_star update for the last block
d_star[-1] = np.linalg.solve(b[-1] - a[-2] @ C_star[-2], d[-1] - a[-2] @ d_star[-2])
# Backward substitution
x[-1] = d_star[-1]
for i in range(N-2, -1, -1):
x[i] = d_star[i] - C_star[i] @ x[i+1]
return x
def test_block_tridiagonal_solver():
N = 4
a = np.array([
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]]
], dtype=np.float64)
b = np.array([
[[5, 0.5], [0.5, 5]],
[[5, 0.5], [0.5, 5]],
[[5, 0.5], [0.5, 5]],
[[5, 0.5], [0.5, 5]]
], dtype=np.float64)
c = np.array([
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]]
], dtype=np.float64)
d = np.array([
[1, 2],
[2, 3],
[3, 4],
[4, 5]
], dtype=np.float64)
x = solve_block_tridiagonal(a, b, c, d)
# Construct the equivalent full matrix A_full and right-hand side d_full
A_full = np.block([
[b[0], c[0], np.zeros((2, 2)), np.zeros((2, 2))],
[a[0], b[1], c[1], np.zeros((2, 2))],
[np.zeros((2, 2)), a[1], b[2], c[2]],
[np.zeros((2, 2)), np.zeros((2, 2)), a[2], b[3]]
])
d_full = d.flatten() # Flatten d for compatibility with the full system
# Solve using numpy's direct solve for comparison
x_np = np.linalg.solve(A_full, d_full).reshape((N, 2))
# Print the solutions for comparison
print("Solution x from block tridiagonal solver (TMDA):\n", x, "\nResidual:", torch.sum(torch.abs(torch.tensor(A_full)@torch.tensor(x).flatten() - torch.tensor(d).flatten())))
print("Solution x from direct full matrix solver:\n", x_np, "\nResidual np:", torch.sum(torch.abs(torch.tensor(A_full)@torch.tensor(x_np).flatten() - torch.tensor(d).flatten())))
# Run the test function
test_block_tridiagonal_solver()
矩阵
a
和 c
的元素数量不正确。您不能使用 a[0] 或 c[3],但要在 TDMA 中获得正确的索引,这些必须存在。
如果您将分块矩阵定义如下,那么您将从两种方法中得到相同的答案。
a = np.array([
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]]
], dtype=np.float64)
b = np.array([
[[5, 0.5], [0.5, 5]],
[[5, 0.5], [0.5, 5]],
[[5, 0.5], [0.5, 5]],
[[5, 0.5], [0.5, 5]]
], dtype=np.float64)
c = np.array([
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]],
[[1, 0.5], [0.5, 1]]
], dtype=np.float64)
更改 C_star 中的元素数量也可以工作对于这个特定示例:
C_star = np.zeros_like(b)
但是,我很确定如果 a 和 c 没有重复的行,那么这无法解决问题。