我从头开始编写了一个线性回归模型。我使用“残差平方和”作为梯度下降的损失函数。为了进行测试,我使用线性数据 (y=x)
运行算法时,截距 b 几乎没有变化。因此斜率 m 计算不正确。
%matplotlib qt5
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
X = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=12345)
class LinearRegression():
def __init__(self):
self.X = None
self.y = None
def ssr(self, m, b):
sum = 0
for i in range(len(self.X)):
sum += (self.y[i] - (m * self.X[i] + b) ) ** 2
return sum
def ssr_gradient(self, m, b):
sum_m = 0
sum_b = 0
n = len(self.X)
for i in range(n):
error = self.y[i] - (m * self.X[i] + b)
derivative_m = -(2/n) * self.X[i] * error # Derivative w.r.t. m
derivative_b = -(2/n) * error # Derivative w.r.t. b
sum_m += derivative_m
sum_b += derivative_b
return sum_m, sum_b
def fit(self, X, y, m, b): # Gradient Descent
self.X = X
self.y = y
M, B = np.meshgrid(np.arange(-10, 10, 0.1), np.arange(-10, 10, 0.1))
SSR = np.zeros_like(M)
for i in range(M.shape[0]):
for j in range(M.shape[1]):
SSR[i, j] = self.ssr(M[i, j], B[i, j])
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
gd_model = fig.add_subplot(121, projection="3d", computed_zorder=False)
lin_reg_model = axes[1]
current_pos = (m, b, self.ssr(m, b))
learning_rate = 0.001
min_step_size = 0.001
max_steps = 1000
current_steps = 0
while(current_steps < max_steps):
M_derivative, B_derivative = self.ssr_gradient(current_pos[0], current_pos[1])
M_step_size, B_step_size = M_derivative * learning_rate, B_derivative * learning_rate
if abs(M_step_size) < min_step_size or abs(B_step_size) < min_step_size:
break
M_new, B_new = current_pos[0] - M_step_size, current_pos[1] - B_step_size
current_pos = (M_new, B_new, self.ssr(M_new, B_new))
print(f"Parameters: m: {current_pos[0]}; b: {current_pos[1]}; SSR: {current_pos[2]}")
current_steps += 1
x = np.arange(0, 10, 1)
y = current_pos[0] * x + current_pos[1]
lin_reg_model.scatter(X_train, y_train, label="Train", s=75, c="#1f77b4")
lin_reg_model.plot(x, y)
gd_model.plot_surface(M, B, SSR, cmap="viridis", zorder=0)
gd_model.scatter(current_pos[0], current_pos[1], current_pos[2], c="red", zorder=1)
gd_model.set_xlabel("Slope m")
gd_model.set_ylabel("Intercept b")
gd_model.set_zlabel("Sum of squared residuals")
plt.tight_layout()
plt.pause(0.001)
gd_model.clear()
lin_reg_model.clear()
self.m = current_pos[0]
self.b = current_pos[1]
def predict(self, X_test):
return self.m * X_test + self.b
lin_reg_model = LinearRegression()
lin_reg_model.fit(X_train, y_train, 1, 10)
这是初始值 m=1 和 b=10 的结果:
参数:m:-0.45129949840919587;乙:9.50972664859535; SSSR:145.06534359577407
显然这不是最佳的,因为我的数据是线性的。所以最优参数应该是m=1和b=0 但我在我的代码中找不到问题。该算法根据初始值打印不同的结果,但只要恰好存在 SSR 函数的最小值,它就应该一遍又一遍地打印相同的结果。
我尝试使用不同的学习率,但问题仍然存在。
感谢 chrslg:我删除了导数中除以 n 的部分。更改学习率和最大迭代参数后,算法速度更快,并且还优化了 b。使用更多的数据而不仅仅是小玩具数据集是有帮助的。感谢史努比博士:绘制每个时期的损失很有帮助。