我如何解决“ValueError:找到具有 0 个样本的数组(形状=(0, 5)),而线性回归至少需要 1 个样本。”

问题描述 投票:0回答:1

我建立了一个干旱预测算法。该模型的输入是未来的年份范围,例如开始年份和结束年份、月份和城市。该模型的输出是一个条形图,其 Y 轴显示干旱指数,X 轴显示年份范围和月份。但我遇到了一个错误。我不知道如何减轻这个错误。请给我一个解决方案来减轻这个错误。

这是我的模型的代码

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from datetime import datetime
from statsmodels.tsa.arima.model import ARIMA

# Load and preprocess data
df = pd.read_csv('Sri lanka drought variables(monthwise).csv')

# Normalize the data
df['Norm_Temperature'] = (df['Average_temperature_month'] - df['Average_temperature_month'].min()) / (df['Average_temperature_month'].max() - df['Average_temperature_month'].min())
df['Norm_Precipitation'] = (df['Average_precipitation_month'] - df['Average_precipitation_month'].min()) / (df['Average_precipitation_month'].max() - df['Average_precipitation_month'].min())
df['Norm_Evapotranspiration'] = (df['Average_evapotranspiration_month'] - df['Average_evapotranspiration_month'].min()) / (df['Average_evapotranspiration_month'].max() - df['Average_evapotranspiration_month'].min())

# Calculate CDI
df['CDI'] = (df['Norm_Precipitation'] + (1 - df['Norm_Temperature']) + df['Norm_Evapotranspiration']) / 3

# Convert Year and Month to Date
df['Date'] = pd.to_datetime(df[['Year', 'Month']].assign(day=1))

# Group data by year, month, and city
grouped_data = df.groupby(['Year', 'Month', 'City']).mean().reset_index()

# Select features and target
X = grouped_data[['Year', 'Month', 'Average_temperature_month', 'Average_precipitation_month', 'Average_evapotranspiration_month']]
y = grouped_data['CDI']

df.head()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
linear_reg = LinearRegression()
random_forest = RandomForestRegressor(random_state=42)
svm = SVR()

# Train the models
linear_reg.fit(X_train, y_train)
random_forest.fit(X_train, y_train)
svm.fit(X_train, y_train)

# Evaluate accuracy
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    score = model.score(X_test, y_test)
    return score, mae, rmse

models = {
    'Linear Regression': linear_reg,
    'Random Forest': random_forest,
    'SVR': svm
}

results = {name: evaluate_model(model, X_test, y_test) for name, model in models.items()}
for name, (score, mae, rmse) in results.items():
    print(f"Model: {name}")
    print(f"  Accuracy Score: {score}")
    print(f"  Mean Absolute Error: {mae}")
    print(f"  Root Mean Squared Error: {rmse}")

best_model_name = max(results, key=lambda k: results[k][0])
best_model = models[best_model_name]
print(f"Best Model: {best_model_name}")

# Forecast future values using ARIMA
def forecast_arima(data, column, order, steps):
    model = ARIMA(data[column], order=order)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=steps)
    return forecast

end_year = grouped_data['Year'].max()
forecast_steps = len(pd.date_range(start=f'{end_year+1}-01-01', end=f'{end_year+1}-12-31', freq='M'))

temperature_forecast = forecast_arima(df, 'Average_temperature_month', order=(5, 1, 0), steps=forecast_steps)
precipitation_forecast = forecast_arima(df, 'Average_precipitation_month', order=(5, 1, 0), steps=forecast_steps)
evapotranspiration_forecast = forecast_arima(df, 'Average_evapotranspiration_month', order=(5, 1, 0), steps=forecast_steps)

future_dates = pd.date_range(start=f'{end_year+1}-01-01', end=f'{end_year+1}-12-31', freq='M')
future_data = pd.DataFrame({
    'Date': future_dates,
    'Temperature': temperature_forecast,
    'Precipitation': precipitation_forecast,
    'Evapotranspiration': evapotranspiration_forecast
})

# Calculate future CDI
future_data['Norm_Temperature'] = (future_data['Temperature'] - df['Average_temperature_month'].min()) / (df['Average_temperature_month'].max() - df['Average_temperature_month'].min())
future_data['Norm_Precipitation'] = (future_data['Precipitation'] - df['Average_precipitation_month'].min()) / (df['Average_precipitation_month'].max() - df['Average_precipitation_month'].min())
future_data['Norm_Evapotranspiration'] = (future_data['Evapotranspiration'] - df['Average_evapotranspiration_month'].min()) / (df['Average_evapotranspiration_month'].max() - df['Average_evapotranspiration_month'].min())

future_data['CDI'] = (future_data['Norm_Precipitation'] + (1 - future_data['Norm_Temperature']) + future_data['Norm_Evapotranspiration']) / 3

# Function to predict drought index for a specific city
def predict_drought(model, city, start_year, end_year):
    city_data = grouped_data[(grouped_data['City'] == city) & (grouped_data['Year'].between(start_year, end_year))]
    X_city = city_data[['Year', 'Month', 'Average_temperature_month', 'Average_precipitation_month', 'Average_evapotranspiration_month']]
    y_city = city_data['CDI']
    predictions = model.predict(X_city)
    city_data['Predicted_CDI'] = predictions
    return city_data

# Example prediction for 'Anuradhapura' from 2030 to 2040
city = 'Anuradhapura'
future_predictions = predict_drought(best_model, city, 2030, 2040)
future_predictions['Date'] = future_predictions.apply(lambda row: datetime(int(row['Year']), int(row['Month']), 1), axis=1)

# Plot the results
plt.figure(figsize=(15, 7))
plt.bar(future_predictions['Date'], future_predictions['CDI'], color='blue')
plt.xlabel('Date')
plt.ylabel('Drought Index (CDI)')
plt.title(f'Predicted Drought Index (CDI) for {city} (2030-2040)')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.show()

错误如下,

ValueError                                
Cell In[87], line 3
      1 # Example prediction for 'Anuradhapura' from 2030 to 2040
      2 city = 'Anuradhapura'
----> 3 future_predictions = predict_drought(best_model, city, 2030, 2040)
      5 if future_predictions is not None:
      6     plt.figure(figsize=(15, 7))

Cell In[86], line 6, in predict_drought(model, city, start_year, end_year)
      4 X_city = city_data[['Year', 'Month', 'Average_temperature_month', 'Average_precipitation_month', 'Average_evapotranspiration_month']]
      5 y_city = city_data['CDI']
----> 6 predictions = model.predict(X_city)
      7 city_data['Predicted_CDI'] = predictions
      8 return city_data

File /lib/python3.11/site-packages/sklearn/linear_model/_base.py:386, in LinearModel.predict(self, X)
    372 def predict(self, X):
    373     """
    374     Predict using the linear model.
    375 
   (...)
    384         Returns predicted values.
    385     """
--> 386     return self._decision_function(X)

File /lib/python3.11/site-packages/sklearn/linear_model/_base.py:369, in LinearModel._decision_function(self, X)
    366 def _decision_function(self, X):
    367     check_is_fitted(self)
--> 369     X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], reset=False)
    370     return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_

File /lib/python3.11/site-packages/sklearn/base.py:605, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)
    603         out = X, y
    604 elif not no_val_X and no_val_y:
--> 605     out = check_array(X, input_name="X", **check_params)
    606 elif no_val_X and not no_val_y:
    607     out = _check_y(y, **check_params)

File /lib/python3.11/site-packages/sklearn/utils/validation.py:967, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
    965     n_samples = _num_samples(array)
    966     if n_samples < ensure_min_samples:
--> 967         raise ValueError(
    968             "Found array with %d sample(s) (shape=%s) while a"
    969             " minimum of %d is required%s."
    970             % (n_samples, array.shape, ensure_min_samples, context)
    971         )
    973 if ensure_min_features > 0 and array.ndim == 2:
    974     n_features = array.shape[1]

ValueError: Found array with 0 sample(s) (shape=(0, 5)) while a minimum of 1 is required by LinearRegression.
python machine-learning prediction valueerror
1个回答
0
投票

尝试检查 predict_drought 函数以下行是否没有给出空输出。看起来好像是给空的。如果是这样,请检查您的数据集是否有满足您的条件的样本,如下行所示。还可以尝试使用 print 命令在函数内部进行调试。

 city_data = grouped_data[(grouped_data['City'] == city) (grouped_data['Year'].between(start_year, end_year))]
© www.soinside.com 2019 - 2024. All rights reserved.