你可以这样做:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
# Set seed for reproducible results
np.random.seed(0)
# Simulate toy data for the demonstration
n = 100 # number of observations
X1 = np.random.normal(0, 1, n)
X2 = np.random.normal(0, 1, n)
X3 = np.random.normal(0, 1, n)
X4 = np.random.normal(0, 1, n)
X5 = np.random.normal(0, 1, n)
Y = 2 + 1 * X1 + 2 * X2 + 3 * X3 - 1.5 * X4 + 0.5 * X5 + np.random.normal(0, 2, n) # Dependent variable
df = pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3, 'X4': X4, 'X5': X5, 'Y': Y})
independent_vars = ['X1', 'X2', 'X3', 'X4', 'X5']
# Fit the models
models = []
for i in range(5):
X = df[independent_vars[i]]
X = sm.add_constant(X)
model = sm.OLS(df['Y'], X).fit()
models.append(model)
# Combine the results into a single table
results_table = summary_col(models,
stars=True,
model_names=[f'Model {i+1}' for i in range(5)],
info_dict={'R-squared': lambda x: f"{x.rsquared:.2f}",
'No. observations': lambda x: f"{int(x.nobs)}"})
# Display the results table
print(results_table)
产生这个:
==================================================================
Model 1 Model 2 Model 3 Model 4 Model 5
------------------------------------------------------------------
const 1.8433*** 1.7668*** 2.1342*** 1.8303*** 1.9413***
(0.4335) (0.4085) (0.3134) (0.4644) (0.4454)
X1 1.4368***
(0.4293)
X2 1.9801***
(0.3936)
X3 3.4614***
(0.3286)
X4 -0.5027
(0.4889)
X5 0.9677**
(0.4294)
R-squared 0.1026 0.2053 0.5310 0.0107 0.0493
R-squared Adj. 0.0934 0.1972 0.5262 0.0006 0.0396
R-squared 0.10 0.21 0.53 0.01 0.05
No. observations 100 100 100 100 100
==================================================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01