我建立了一个干旱预测算法。该模型的输入是未来的年份范围,例如开始年份和结束年份、月份和城市。该模型的输出是一个条形图,其 Y 轴显示干旱指数,X 轴显示年份范围和月份。但我遇到了一个错误。我不知道如何减轻这个错误。请给我一个解决方案来减轻这个错误。
这是我的模型的代码
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from datetime import datetime
from statsmodels.tsa.arima.model import ARIMA
# Load and preprocess data
df = pd.read_csv('Sri lanka drought variables(monthwise).csv')
# Normalize the data
df['Norm_Temperature'] = (df['Average_temperature_month'] - df['Average_temperature_month'].min()) / (df['Average_temperature_month'].max() - df['Average_temperature_month'].min())
df['Norm_Precipitation'] = (df['Average_precipitation_month'] - df['Average_precipitation_month'].min()) / (df['Average_precipitation_month'].max() - df['Average_precipitation_month'].min())
df['Norm_Evapotranspiration'] = (df['Average_evapotranspiration_month'] - df['Average_evapotranspiration_month'].min()) / (df['Average_evapotranspiration_month'].max() - df['Average_evapotranspiration_month'].min())
# Calculate CDI
df['CDI'] = (df['Norm_Precipitation'] + (1 - df['Norm_Temperature']) + df['Norm_Evapotranspiration']) / 3
# Convert Year and Month to Date
df['Date'] = pd.to_datetime(df[['Year', 'Month']].assign(day=1))
# Group data by year, month, and city
grouped_data = df.groupby(['Year', 'Month', 'City']).mean().reset_index()
# Select features and target
X = grouped_data[['Year', 'Month', 'Average_temperature_month', 'Average_precipitation_month', 'Average_evapotranspiration_month']]
y = grouped_data['CDI']
df.head()
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize models
linear_reg = LinearRegression()
random_forest = RandomForestRegressor(random_state=42)
svm = SVR()
# Train the models
linear_reg.fit(X_train, y_train)
random_forest.fit(X_train, y_train)
svm.fit(X_train, y_train)
# Evaluate accuracy
def evaluate_model(model, X_test, y_test):
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
score = model.score(X_test, y_test)
return score, mae, rmse
models = {
'Linear Regression': linear_reg,
'Random Forest': random_forest,
'SVR': svm
}
results = {name: evaluate_model(model, X_test, y_test) for name, model in models.items()}
for name, (score, mae, rmse) in results.items():
print(f"Model: {name}")
print(f" Accuracy Score: {score}")
print(f" Mean Absolute Error: {mae}")
print(f" Root Mean Squared Error: {rmse}")
best_model_name = max(results, key=lambda k: results[k][0])
best_model = models[best_model_name]
print(f"Best Model: {best_model_name}")
# Forecast future values using ARIMA
def forecast_arima(data, column, order, steps):
model = ARIMA(data[column], order=order)
model_fit = model.fit()
forecast = model_fit.forecast(steps=steps)
return forecast
end_year = grouped_data['Year'].max()
forecast_steps = len(pd.date_range(start=f'{end_year+1}-01-01', end=f'{end_year+1}-12-31', freq='M'))
temperature_forecast = forecast_arima(df, 'Average_temperature_month', order=(5, 1, 0), steps=forecast_steps)
precipitation_forecast = forecast_arima(df, 'Average_precipitation_month', order=(5, 1, 0), steps=forecast_steps)
evapotranspiration_forecast = forecast_arima(df, 'Average_evapotranspiration_month', order=(5, 1, 0), steps=forecast_steps)
future_dates = pd.date_range(start=f'{end_year+1}-01-01', end=f'{end_year+1}-12-31', freq='M')
future_data = pd.DataFrame({
'Date': future_dates,
'Temperature': temperature_forecast,
'Precipitation': precipitation_forecast,
'Evapotranspiration': evapotranspiration_forecast
})
# Calculate future CDI
future_data['Norm_Temperature'] = (future_data['Temperature'] - df['Average_temperature_month'].min()) / (df['Average_temperature_month'].max() - df['Average_temperature_month'].min())
future_data['Norm_Precipitation'] = (future_data['Precipitation'] - df['Average_precipitation_month'].min()) / (df['Average_precipitation_month'].max() - df['Average_precipitation_month'].min())
future_data['Norm_Evapotranspiration'] = (future_data['Evapotranspiration'] - df['Average_evapotranspiration_month'].min()) / (df['Average_evapotranspiration_month'].max() - df['Average_evapotranspiration_month'].min())
future_data['CDI'] = (future_data['Norm_Precipitation'] + (1 - future_data['Norm_Temperature']) + future_data['Norm_Evapotranspiration']) / 3
# Function to predict drought index for a specific city
def predict_drought(model, city, start_year, end_year):
city_data = grouped_data[(grouped_data['City'] == city) & (grouped_data['Year'].between(start_year, end_year))]
X_city = city_data[['Year', 'Month', 'Average_temperature_month', 'Average_precipitation_month', 'Average_evapotranspiration_month']]
y_city = city_data['CDI']
predictions = model.predict(X_city)
city_data['Predicted_CDI'] = predictions
return city_data
# Example prediction for 'Anuradhapura' from 2030 to 2040
city = 'Anuradhapura'
future_predictions = predict_drought(best_model, city, 2030, 2040)
future_predictions['Date'] = future_predictions.apply(lambda row: datetime(int(row['Year']), int(row['Month']), 1), axis=1)
# Plot the results
plt.figure(figsize=(15, 7))
plt.bar(future_predictions['Date'], future_predictions['CDI'], color='blue')
plt.xlabel('Date')
plt.ylabel('Drought Index (CDI)')
plt.title(f'Predicted Drought Index (CDI) for {city} (2030-2040)')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.show()
错误如下,
ValueError
Cell In[87], line 3
1 # Example prediction for 'Anuradhapura' from 2030 to 2040
2 city = 'Anuradhapura'
----> 3 future_predictions = predict_drought(best_model, city, 2030, 2040)
5 if future_predictions is not None:
6 plt.figure(figsize=(15, 7))
Cell In[86], line 6, in predict_drought(model, city, start_year, end_year)
4 X_city = city_data[['Year', 'Month', 'Average_temperature_month', 'Average_precipitation_month', 'Average_evapotranspiration_month']]
5 y_city = city_data['CDI']
----> 6 predictions = model.predict(X_city)
7 city_data['Predicted_CDI'] = predictions
8 return city_data
File /lib/python3.11/site-packages/sklearn/linear_model/_base.py:386, in LinearModel.predict(self, X)
372 def predict(self, X):
373 """
374 Predict using the linear model.
375
(...)
384 Returns predicted values.
385 """
--> 386 return self._decision_function(X)
File /lib/python3.11/site-packages/sklearn/linear_model/_base.py:369, in LinearModel._decision_function(self, X)
366 def _decision_function(self, X):
367 check_is_fitted(self)
--> 369 X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], reset=False)
370 return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
File /lib/python3.11/site-packages/sklearn/base.py:605, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)
603 out = X, y
604 elif not no_val_X and no_val_y:
--> 605 out = check_array(X, input_name="X", **check_params)
606 elif no_val_X and not no_val_y:
607 out = _check_y(y, **check_params)
File /lib/python3.11/site-packages/sklearn/utils/validation.py:967, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
965 n_samples = _num_samples(array)
966 if n_samples < ensure_min_samples:
--> 967 raise ValueError(
968 "Found array with %d sample(s) (shape=%s) while a"
969 " minimum of %d is required%s."
970 % (n_samples, array.shape, ensure_min_samples, context)
971 )
973 if ensure_min_features > 0 and array.ndim == 2:
974 n_features = array.shape[1]
ValueError: Found array with 0 sample(s) (shape=(0, 5)) while a minimum of 1 is required by LinearRegression.
尝试检查 predict_drought 函数以下行是否没有给出空输出。看起来好像是给空的。如果是这样,请检查您的数据集是否有满足您的条件的样本,如下行所示。还可以尝试使用 print 命令在函数内部进行调试。
city_data = grouped_data[(grouped_data['City'] == city) (grouped_data['Year'].between(start_year, end_year))]