Pytorch 和 SGD 系数不匹配。怎么了？

Question

我正在处理一个用例，其中获取 OLS 系数的大致范围内的系数非常重要。

我下面有的是可重现的代码和数据，它们通过 OLS 和两种不同的随机梯度下降运行。第一个是未缩放的数据。第二个是缩放数据。

我不知道为什么我不能让系数更接近。任何帮助将不胜感激。

import pandas as pd
from statsmodels.formula.api import ols
import torch.optim as optim
from sklearn.preprocessing import StandardScaler


# OLS FIT
# --------------------
# read data
df = pd.read_csv('https://gist.githubusercontent.com/alexhallam/820baf95a6f29f8d032dadd384dc4f6b/raw/b3e6a6829a8ce94349acfc775801d426fd05d08b/test_ols_sgd_data.csv')
print(df.head())
# OLS
# no constant
fit = ols('z ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 - 1', data=df).fit()
print(fit.summary())

# ==============================================================================
#                  coef    std err          t      P>|t|      [0.025      0.975]
# ------------------------------------------------------------------------------
# x1            -0.2264      0.004    -61.238      0.000      -0.234      -0.219
# x2            44.1761    101.652      0.435      0.665    -157.475     245.827
# x3           -27.3627     49.522     -0.553      0.582    -125.602      70.876
# x4          -175.4637    406.635     -0.432      0.667    -982.118     631.191
# x5           109.4044    197.839      0.553      0.581    -283.055     501.864
# x6          -254.8337    946.742     -0.269      0.788   -2132.915    1623.248
# x7           775.7747   3247.076      0.239      0.812   -5665.550    7217.100
# x8           224.4371    400.427      0.560      0.576    -569.903    1018.777
# x9          -747.5295   1328.878     -0.563      0.575   -3383.666    1888.607
# x10          144.7102   2807.294      0.052      0.959   -5424.206    5713.627
# x11          365.9518   7526.714      0.049      0.961   -1.46e+04    1.53e+04
# x12         -540.0365    943.876     -0.572      0.568   -2412.433    1332.360
# x13         1252.3184   2172.219      0.577      0.566   -3056.780    5561.417
# x14          696.6699   2583.791      0.270      0.788   -4428.876    5822.216
# x15        -2816.9348   4121.580     -0.683      0.496    -1.1e+04    5359.172
# x16          313.9130    533.481      0.588      0.558    -744.369    1372.195
# ==============================================================================
# Omnibus:                       13.947   Durbin-Watson:                   0.834
# Prob(Omnibus):                  0.001   Jarque-Bera (JB):               19.671
# Skew:                          -0.607   Prob(JB):                     5.35e-05
# Kurtosis:                       4.601   Cond. No.                     2.14e+07
# ==============================================================================


# PYTORCH FIT
# --------------------
import torch
import torch.nn as nn

scaler = StandardScaler()
X_scaled = torch.tensor(scaler.fit_transform(df.iloc[:, 2:].values)) # Skip x1 which is ones because torch will add its own
X = torch.tensor(df.iloc[:, 2:].values)
y = torch.tensor(df.iloc[:, 0].values)
class LinearRegression(nn.Module):
    def __init__(self, input_dim):
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(input_dim, 1)  # One input feature, one output
        self.linear.weight = nn.Parameter(self.linear.weight.to(torch.float64))  # Convert weight 
        self.linear.bias = nn.Parameter(self.linear.bias.to(torch.float64))  # Convert bias to float64

    def forward(self, x):
        return self.linear(x)

epoch_data = {}
m_dict = {}
def fit_by_SGD(X, y, learning_rate=0.1, num_epochs=5000, weight_decay=0.0, convergence_threshold=1e-15, debug=True):
    input_dim = X.shape[1] # Number of columns in X
    model = LinearRegression(input_dim) # Instantiate the model
    criterion = nn.MSELoss() # Define the loss function
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    prev_loss = None
    for epoch in range(num_epochs):
        outputs = model(X.to(torch.float64))
        loss = criterion(outputs, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 1000 == 0:
            coeffs_i = torch.cat((model.linear.bias.data, model.linear.weight.data.view(-1)))
            epoch_data[epoch] = {
                'epoch': epoch + 1,
                'loss': f'{loss.item():.4f}',
                }
            for i, coeff in enumerate(coeffs_i):
                epoch_data[epoch][f'coeff_{i+1}'] = f'{coeff.item():.4f}'
            print(f'epoch_data: {epoch_data[epoch]}')
        if epoch >= 1 and prev_loss is not None:
            if abs(loss.item() - prev_loss) < convergence_threshold:
                print(f"Convergence of {convergence_threshold} reached at epoch {epoch + 1}")
                break
        prev_loss = loss.item()
    coeffs = torch.cat((model.linear.bias.data, model.linear.weight.data.view(-1)))
    if debug:
        print('\n')
        x_string = [f'x{i+1}' for i in range(input_dim+1)]
        print(pd.DataFrame({'x': x_string,'coef:': coeffs.numpy()}))
        
fit_by_SGD(X, y, learning_rate=0.1, num_epochs=5000, weight_decay=0.0, convergence_threshold=1e-15, debug=True)

#       x     coef:
# 0    x1 -0.250132
# 1    x2  0.026537
# 2    x3  0.041729
# 3    x4 -0.094462
# 4    x5 -0.109195
# 5    x6 -0.096946
# 6    x7  0.138378
# 7    x8 -0.147068
# 8    x9 -0.012957
# 9   x10  0.062613
# 10  x11  0.086535
# 11  x12  0.067089
# 12  x13 -0.185289
# 13  x14 -0.080567
# 14  x15 -0.143307
# 15  x16  0.141357

fit_by_SGD(X_scaled, y, learning_rate=0.1, num_epochs=5000, weight_decay=0.0, convergence_threshold=1e-15, debug=True)

#       x     coef:
# 0    x1 -0.249526
# 1    x2  0.146164
# 2    x3  0.026521
# 3    x4 -0.068251
# 4    x5 -0.016403
# 5    x6  0.015885
# 6    x7 -0.044744
# 7    x8 -0.080269
# 8    x9  0.053531
# 9   x10 -0.166024
# 10  x11  0.084116
# 11  x12  0.027392
# 12  x13 -0.033975
# 13  x14  0.034142
# 14  x15 -0.009764
# 15  x16  0.023336

Answer 1

可能需要培训，例如：

model = ols('z ...')
fit = model.fit()
print(fit.summary())

Pytorch 和 SGD 系数不匹配。怎么了？

问题描述投票：0回答：1

1个回答

最新问题

Pytorch 和 SGD 系数不匹配。怎么了？

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1