我正在尝试使用贝叶斯推理技术将线性模型拟合到数据。为此,我想到了使用 PyMC。当然,在训练模型后,我想在新数据上测试其性能,这就是问题发生的地方。我似乎无法设置新的数据集。大家有经验吗
显示错误的示例脚本:
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
def run_model():
# Generate synthetic data
np.random.seed(42)
x = np.linspace(0, 10, 100)
a_true = 2.5 # True slope
b_true = 1.0 # True intercept
y_true = a_true * x + b_true
y = y_true + np.random.normal(0, 1, size=x.size) # Add some noise
# Split into training and test sets
x_train, x_test = x[:80], x[80:]
y_train, y_test = y[:80], y[80:]
# Define and fit the model
with pm.Model() as linear_model:
# Define x as a pm.Data variable to allow updating with pm.set_data
x_shared = pm.Data("x", x_train)
# Priors for slope and intercept
a = pm.Normal("a", mu=0, sigma=10)
b = pm.Normal("b", mu=0, sigma=10)
sigma = pm.HalfNormal("sigma", sigma=1)
# Expected value of y
mu = a * x_shared + b
# Likelihood
y_obs = pm.Normal("y_obs", mu=mu, sigma=sigma, observed=y_train)
# Sample from the posterior
trace = pm.sample(1000, tune=1000, return_inferencedata=True, chains=1)
# Predict on training data
with linear_model:
pm.set_data({"x": x_train}) # Update data to training
post_pred_train = pm.sample_posterior_predictive(trace)
# Predict on test data
with linear_model:
pm.set_data({"x": x_test}) # Update data to testing
post_pred_test = pm.sample_posterior_predictive(trace)
# Plot results
plt.figure(figsize=(10, 5))
# Plot training data
plt.scatter(x_train, y_train, c="blue", label="Training data")
plt.plot(x_train, y_true[:80], "k--", label="True function")
# Plot posterior predictive for training data
plt.plot(
x_train,
post_pred_train["y_obs"].mean(axis=0),
label="Posterior predictive (train)",
color="red",
)
# Plot test data
plt.scatter(x_test, y_test, c="green", label="Test data")
# Plot posterior predictive for test data
plt.plot(
x_test,
post_pred_test["y_obs"].mean(axis=0),
label="Posterior predictive (test)",
color="orange",
)
plt.legend()
plt.xlabel("x")
plt.ylabel("y")
plt.title("Bayesian Linear Regression with PyMC")
plt.show()
# Summary of the model parameters
print(az.summary(trace, var_names=["a", "b", "sigma"]))
# Only execute if run as the main module
if __name__ == '__main__':
run_model()
ValueError:形状不匹配:对象无法广播到单个形状。 形状 (80,) 的 arg 0 与形状 (20,) 的 arg 1 之间不匹配。
应用导致错误的节点:normal_rv{"(),()->()"}(RNG(
我不知道足够的数学来完全理解你的问题,但这些行似乎很可疑:
x = np.linspace(0, 10, 100)
[...]
x_train, x_test = x[:80], x[80:]
y_train, y_test = y[:80], y[80:]
这是唯一可能出现尺寸为 20 的错误的地方。