元组索引超出范围(线性回归)

问题描述 投票:0回答:1

我正在尝试从头开始使用梯度下降运行线性回归。我有一个 csv 文件,我想为其运行此线性回归。这是代码:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('/Users/eliasbrasildesouza/Downloads/archive (2)/prices.csv')
X = df['open'].values
y = df['close'].values

import numpy as np

class GradientDescentLinearRegression:  
    def __init__(self, learning_rate=0.01, max_iterations=100000, eps=1e-6):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.eps = eps
        
    def predict(self, X):
        return np.dot(X, self.w.T)
    
    def cost(self, X, y):
        y_pred = self.predict(X)
        loss = (y - y_pred)**2
        return np.mean(loss)

    def grad(self, X, y):
        """Returns the gradient vector"""
        y_pred = self.predict(X)
        d_intercept = -2*sum(y - y_pred)                    
        d_x = -2*sum(X[:,1:] * (y - y_pred).reshape(-1,1))  
        g = np.append(np.array(d_intercept), d_x)           
        return g / X.shape[0]                               

    def fit(self, X, y, method = "standard", verbose = True):        
        self.w = np.zeros(X.shape[1])              
        w_hist = [self.w]                                 
        cost_hist = [self.cost(X, y)]                     
        
        for iter in range(self.max_iterations):
            
            g = self.grad(X, y)                           
            if method == "standard":
                step = self.learning_rate * g             
            else:
                raise ValueError("Method not supported.")
            self.w = self.w - step                        
            w_hist.append(self.w)                        
            
            J = self.cost(X, y)                           
            cost_hist.append(J)                           
            
            if verbose:
                print(f"Iter: {iter}, gradient: {g}, params: {self.w}, cost: {J}")
            
            if np.linalg.norm(w_hist[-1] - w_hist[-2]) < self.eps:
                break

        self.iterations = iter + 1                      
        self.w_hist = w_hist
        self.cost_hist = cost_hist
        self.method = method
        
        return self

reg = GradientDescentLinearRegression()
reg.fit(X, y)

我得到的错误是

 self.w = np.zeros(X.shape[1])                     
     50         w_hist = [self.w]                        
     51         cost_hist = [self.cost(X, y)]             

IndexError: tuple index out of range

我尝试将 X.shape[1] 更改为 X.shape[0] 但出现了类似的问题。我从这里得到这段代码:https://dmitrijskass.netlify.app/2021/04/03/gradient-descent-with-linear-regression-from-scratch/

python machine-learning linear-regression gradient-descent
1个回答
0
投票

您需要使用 X.reshape(-1, 1) 将 X 重塑为具有单列的二维数组:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('/Users/eliasbrasildesouza/Downloads/archive (2)/prices.csv')
X = df['open'].values.reshape(-1, 1)
y = df['close'].values

class GradientDescentLinearRegression:  
    def __init__(self, learning_rate=0.01, max_iterations=100000, eps=1e-6):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.eps = eps
        
    def predict(self, X):
        return np.dot(X, self.w.T)
    
    def cost(self, X, y):
        y_pred = self.predict(X)
        loss = (y - y_pred)**2
        return np.mean(loss)

    def grad(self, X, y):
        """Returns the gradient vector"""
        y_pred = self.predict(X)
        d_intercept = -2*sum(y - y_pred)                    
        d_x = -2*sum(X[:,1:] * (y - y_pred).reshape(-1,1))  
        g = np.append(np.array(d_intercept), d_x)           
        return g / X.shape[0]                               

    def fit(self, X, y, method = "standard", verbose = True):        
        self.w = np.zeros(X.shape[1])              
        w_hist = [self.w]                                 
        cost_hist = [self.cost(X, y)]                     
        
        for iter in range(self.max_iterations):
            
            g = self.grad(X, y)                           
            if method == "standard":
                step = self.learning_rate * g             
            else:
                raise ValueError("Method not supported.")
            self.w = self.w - step                        
            w_hist.append(self.w)                        
            
            J = self.cost(X, y)                           
            cost_hist.append(J)                           
            
            if verbose:
                print(f"Iter: {iter}, gradient: {g}, params: {self.w}, cost: {J}")
            
            if np.linalg.norm(w_hist[-1] - w_hist[-2]) < self.eps:
                break

        self.iterations = iter + 1                      
        self.w_hist = w_hist
        self.cost_hist = cost_hist
        self.method = method
        
        return self

reg = GradientDescentLinearRegression()
reg.fit(X, y)
© www.soinside.com 2019 - 2024. All rights reserved.