这是我尝试运行代码时显示的内容:
FutureWarning:与其他归约函数(例如
、skew
)不同,kurtosis
的默认行为通常会保留其作用的轴。在 SciPy 1.11.0 中,此行为将发生变化:mode
的默认值将变为 False,将消除对其进行统计的keepdims
,并且不再接受值 None。将axis
设置为 True 或 False 以避免此警告。 实验室=模式(标签)keepdims
这是我的 Python 代码,我在尝试找到合适的解决方案时发现了一些困难:
# Importing the required modules
import numpy as np
from scipy.stats import mode
# Euclidean Distance
def eucledian(p1, p2):
dist = np.sqrt(np.sum((p1 - p2) ** 2))
return dist
# Function to calculate KNN
def predict(x_train, y, x_input, k):
op_labels = []
# Loop through the Datapoints to be classified
for item in x_input:
# Array to store distances
point_dist = []
# Loop through each training Data
for j in range(len(x_train)):
distances = eucledian(np.array(x_train[j, :]), item)
# Calculating the distance
point_dist.append(distances)
point_dist = np.array(point_dist)
# Sorting the array while preserving the index
# Keeping the first K datapoints
dist = np.argsort(point_dist)[:k]
# Labels of the K datapoints from above
labels = y[dist]
** # Majority voting
lab = mode(labels)
lab = lab.mode[0]
op_labels.append(lab)**
return op_labels
# Importing the required modules
# Importing required modules
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from numpy.random import randint
# Loading the Data
iris= load_iris()
# Store features matrix in X
X= iris.data
# Store target vector in
y = iris.target
# Creating the training Data
train_idx = xxx = randint(0, 150, 100)
X_train = X[train_idx]
y_train = y[train_idx]
# Creating the testing Data
test_idx = xxx = randint(0, 150, 50) # taking 50 random samples
X_test = X[test_idx]
y_test = y[test_idx]
# Applying our function
y_pred = predict(X_train, y_train, X_test, 7)
# Checking the accuracy
accuracy_score(y_test, y_pred)
我期待预测/准确性成为提示。
查看这篇文章.
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
最简单的方法就是忽略它。作者还讨论了如何修复它,所以你可能想检查一下。
KNN可以这样做。
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
# Assign colum names to the dataset
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
# Read dataset to pandas dataframe
dataset = pd.read_csv(url, names=names)
dataset.head()
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
# Result:
precision recall f1-score support
Iris-setosa 1.00 1.00 1.00 13
Iris-versicolor 1.00 0.89 0.94 9
Iris-virginica 0.89 1.00 0.94 8
accuracy 0.97 30
macro avg 0.96 0.96 0.96 30
weighted avg 0.97 0.97 0.97 30
error = []
# Calculating error for K values between 1 and 40
for i in range(1, 40):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit(X_train, y_train)
pred_i = knn.predict(X_test)
error.append(np.mean(pred_i != y_test))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')