我正在尝试从头开始使用梯度下降运行线性回归。我有一个 csv 文件,我想为其运行此线性回归。这是代码:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('/Users/eliasbrasildesouza/Downloads/archive (2)/prices.csv')
X = df['open'].values
y = df['close'].values
import numpy as np
class GradientDescentLinearRegression:
def __init__(self, learning_rate=0.01, max_iterations=100000, eps=1e-6):
self.learning_rate = learning_rate
self.max_iterations = max_iterations
self.eps = eps
def predict(self, X):
return np.dot(X, self.w.T)
def cost(self, X, y):
y_pred = self.predict(X)
loss = (y - y_pred)**2
return np.mean(loss)
def grad(self, X, y):
"""Returns the gradient vector"""
y_pred = self.predict(X)
d_intercept = -2*sum(y - y_pred)
d_x = -2*sum(X[:,1:] * (y - y_pred).reshape(-1,1))
g = np.append(np.array(d_intercept), d_x)
return g / X.shape[0]
def fit(self, X, y, method = "standard", verbose = True):
self.w = np.zeros(X.shape[1])
w_hist = [self.w]
cost_hist = [self.cost(X, y)]
for iter in range(self.max_iterations):
g = self.grad(X, y)
if method == "standard":
step = self.learning_rate * g
else:
raise ValueError("Method not supported.")
self.w = self.w - step
w_hist.append(self.w)
J = self.cost(X, y)
cost_hist.append(J)
if verbose:
print(f"Iter: {iter}, gradient: {g}, params: {self.w}, cost: {J}")
if np.linalg.norm(w_hist[-1] - w_hist[-2]) < self.eps:
break
self.iterations = iter + 1
self.w_hist = w_hist
self.cost_hist = cost_hist
self.method = method
return self
reg = GradientDescentLinearRegression()
reg.fit(X, y)
我得到的错误是
self.w = np.zeros(X.shape[1])
50 w_hist = [self.w]
51 cost_hist = [self.cost(X, y)]
IndexError: tuple index out of range
我尝试将 X.shape[1] 更改为 X.shape[0] 但出现了类似的问题。我从这里得到这段代码:https://dmitrijskass.netlify.app/2021/04/03/gradient-descent-with-linear-regression-from-scratch/
您需要使用 X.reshape(-1, 1) 将 X 重塑为具有单列的二维数组:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('/Users/eliasbrasildesouza/Downloads/archive (2)/prices.csv')
X = df['open'].values.reshape(-1, 1)
y = df['close'].values
class GradientDescentLinearRegression:
def __init__(self, learning_rate=0.01, max_iterations=100000, eps=1e-6):
self.learning_rate = learning_rate
self.max_iterations = max_iterations
self.eps = eps
def predict(self, X):
return np.dot(X, self.w.T)
def cost(self, X, y):
y_pred = self.predict(X)
loss = (y - y_pred)**2
return np.mean(loss)
def grad(self, X, y):
"""Returns the gradient vector"""
y_pred = self.predict(X)
d_intercept = -2*sum(y - y_pred)
d_x = -2*sum(X[:,1:] * (y - y_pred).reshape(-1,1))
g = np.append(np.array(d_intercept), d_x)
return g / X.shape[0]
def fit(self, X, y, method = "standard", verbose = True):
self.w = np.zeros(X.shape[1])
w_hist = [self.w]
cost_hist = [self.cost(X, y)]
for iter in range(self.max_iterations):
g = self.grad(X, y)
if method == "standard":
step = self.learning_rate * g
else:
raise ValueError("Method not supported.")
self.w = self.w - step
w_hist.append(self.w)
J = self.cost(X, y)
cost_hist.append(J)
if verbose:
print(f"Iter: {iter}, gradient: {g}, params: {self.w}, cost: {J}")
if np.linalg.norm(w_hist[-1] - w_hist[-2]) < self.eps:
break
self.iterations = iter + 1
self.w_hist = w_hist
self.cost_hist = cost_hist
self.method = method
return self
reg = GradientDescentLinearRegression()
reg.fit(X, y)