我使用以下代码进行非线性回归,并且得到截距和 20 个系数。如何解释这些系数才能构造公式 Y =截距 + coeff*feat_a + .....?
data = pd.read_csv("data.csv")
# Step 2: Preprocess the data
X = data[['feat_a', 'feat_b', 'feat_c', 'feat_d', 'feat_e' ]] # Independent variables including blue (b)
y = data['Y']
#splitting Train and Test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=101)
# Dependent variable
# Step 3: Fit a polynomial regression model
degree = 2 # Adjust the degree of the polynomial as needed
poly_features = PolynomialFeatures(degree=degree, include_bias=False)
X_poly = poly_features.fit_transform(X_train)
model = LinearRegression()
model.fit(X_poly, y_train)
# Step 4: Evaluate the model (optional)
# Step 5: Extract the formula and parameters
intercept = model.intercept_
coefficients = model.coef_
print("coeff size{}".format(len(coefficients)))
print(coefficients)
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
# 3 features, 100 samples
feature_names = ["feat_a", "feat_b", "feat_c"]
X = np.random.random(100 * 3).reshape(100, 3)
y = np.random.random(100)
# create polynimials
poly = PolynomialFeatures(2, include_bias=True)
X_poly = poly.fit_transform(X)
# train model
model = LinearRegression(fit_intercept=False)
model.fit(X_poly, y)
coef_strings = [f"({coef} * {name})" for name, coef in zip(poly.get_feature_names_out(feature_names), model.coef_)]
print("y = " + " + ".join(coef_strings))
这将打印类似的内容(你的 coefs 会有所不同):“y = (0.6742387467644626 * 1) + (-0.7356459867692553 * feat_a) + (0.3446647936872055 * feat_b) + (-0.34891490481375154 * feat_ c) + (0.40956289803329116 * feat_a^2) + (0.3308401122030401 * feat_a feat_b) + (0.39767223339891805 * feat_a feat_c) + (0.24060006772800432 * feat_b^2) + (-1.0212240828105337 * feat_b feat_c) + (0 .4326304384247376 * feat_c^2)"