我想绘制一个混淆矩阵来可视化分类器的性能,但它仅显示标签的数量,而不显示标签本身:
from sklearn.metrics import confusion_matrix
import pylab as pl
y_test=['business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business']
pred=array(['health', 'business', 'business', 'business', 'business',
'business', 'health', 'health', 'business', 'business', 'business',
'business', 'business', 'business', 'business', 'business',
'health', 'health', 'business', 'health'],
dtype='|S8')
cm = confusion_matrix(y_test, pred)
pl.matshow(cm)
pl.title('Confusion matrix of the classifier')
pl.colorbar()
pl.show()
如何将标签(健康、商业等)添加到混淆矩阵中?
更新:
ConfusionMatrixDisplay
旧答案:
seaborn.heatmap
。
import seaborn as sns
import matplotlib.pyplot as plt
ax= plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax); #annot=True to annotate cells, ftm='g' to disable scientific notation
# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['business', 'health']); ax.yaxis.set_ticklabels(['health', 'business']);
正如这个问题中所暗示的,您必须通过存储您调用的matplotlib函数(fig
、
ax
和cax
)传递的图形和轴对象来“打开”
较低级别的艺术家API下面的变量)。然后,您可以使用
set_xticklabels
/set_yticklabels
: 替换默认的 x 轴和 y 轴刻度
from sklearn.metrics import confusion_matrix
labels = ['business', 'health']
cm = confusion_matrix(y_test, pred, labels)
print(cm)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
plt.title('Confusion matrix of the classifier')
fig.colorbar(cax)
ax.set_xticklabels([''] + labels)
ax.set_yticklabels([''] + labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
请注意,我将
labels
列表传递给 confusion_matrix
函数,以确保它正确排序,与刻度匹配。
结果如下图:
添加到@akilat90关于
sklearn.metrics.plot_confusion_matrix
的更新:
您可以直接在
ConfusionMatrixDisplay
中使用 sklearn.metrics
类,而无需将分类器传递给 plot_confusion_matrix
。它还具有 display_labels
参数,允许您根据需要指定图中显示的标签。
ConfusionMatrixDisplay
的构造函数没有提供对绘图进行大量额外自定义的方法,但您可以在调用其 ax_
方法后通过 plot()
属性访问 matplotlib 轴对象。我添加了第二个示例来说明这一点。
我发现必须对大量数据重新运行分类器才能生成带有
plot_confusion_matrix
的图,这很烦人。我正在根据预测数据生成其他图,所以我不想每次都浪费时间重新预测。这也是解决该问题的一个简单方法。
示例:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(y_true, y_preds, normalize='all')
cmd = ConfusionMatrixDisplay(cm, display_labels=['business','health'])
cmd.plot()
使用
ax_
的示例:
cm = confusion_matrix(y_true, y_preds, normalize='all')
cmd = ConfusionMatrixDisplay(cm, display_labels=['business','health'])
cmd.plot()
cmd.ax_.set(xlabel='Predicted', ylabel='True')
我找到了一个可以绘制从
sklearn
生成的混淆矩阵的函数。
import numpy as np
def plot_confusion_matrix(cm,
target_names,
title='Confusion matrix',
cmap=None,
normalize=True):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
import matplotlib.pyplot as plt
import numpy as np
import itertools
accuracy = np.trace(cm) / np.sum(cm).astype('float')
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
from sklearn import model_selection
test_size = 0.33
seed = 7
X_train, X_test, y_train, y_test = model_selection.train_test_split(feature_vectors, y, test_size=test_size, random_state=seed)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
model = LogisticRegression()
model.fit(X_train, y_train)
result = model.score(X_test, y_test)
print("Accuracy: %.3f%%" % (result*100.0))
y_pred = model.predict(X_test)
print("F1 Score: ", f1_score(y_test, y_pred, average="macro"))
print("Precision Score: ", precision_score(y_test, y_pred, average="macro"))
print("Recall Score: ", recall_score(y_test, y_pred, average="macro"))
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
def cm_analysis(y_true, y_pred, labels, ymap=None, figsize=(10,10)):
"""
Generate matrix plot of confusion matrix with pretty annotations.
The plot image is saved to disk.
args:
y_true: true label of the data, with shape (nsamples,)
y_pred: prediction of the data, with shape (nsamples,)
filename: filename of figure file to save
labels: string array, name the order of class labels in the confusion matrix.
use `clf.classes_` if using scikit-learn models.
with shape (nclass,).
ymap: dict: any -> string, length == nclass.
if not None, map the labels & ys to more understandable strings.
Caution: original y_true, y_pred and labels must align.
figsize: the size of the figure plotted.
"""
if ymap is not None:
# change category codes or labels to new labels
y_pred = [ymap[yi] for yi in y_pred]
y_true = [ymap[yi] for yi in y_true]
labels = [ymap[yi] for yi in labels]
# calculate a confusion matrix with the new labels
cm = confusion_matrix(y_true, y_pred, labels=labels)
# calculate row sums (for calculating % & plot annotations)
cm_sum = np.sum(cm, axis=1, keepdims=True)
# calculate proportions
cm_perc = cm / cm_sum.astype(float) * 100
# empty array for holding annotations for each cell in the heatmap
annot = np.empty_like(cm).astype(str)
# get the dimensions
nrows, ncols = cm.shape
# cycle over cells and create annotations for each cell
for i in range(nrows):
for j in range(ncols):
# get the count for the cell
c = cm[i, j]
# get the percentage for the cell
p = cm_perc[i, j]
if i == j:
s = cm_sum[i]
# convert the proportion, count, and row sum to a string with pretty formatting
annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
elif c == 0:
annot[i, j] = ''
else:
annot[i, j] = '%.1f%%\n%d' % (p, c)
# convert the array to a dataframe. To plot by proportion instead of number, use cm_perc in the DataFrame instead of cm
cm = pd.DataFrame(cm, index=labels, columns=labels)
cm.index.name = 'Actual'
cm.columns.name = 'Predicted'
# create empty figure with a specified size
fig, ax = plt.subplots(figsize=figsize)
# plot the data using the Pandas dataframe. To change the color map, add cmap=..., e.g. cmap = 'rocket_r'
sns.heatmap(cm, annot=annot, fmt='', ax=ax)
#plt.savefig(filename)
plt.show()
cm_analysis(y_test, y_pred, model.classes_, ymap=None, figsize=(10,10))
使用https://gist.github.com/hitvoice/36cf44689065ca9b927431546381a3f7
请注意,如果您使用
rocket_r
,它会反转颜色,并且以某种方式看起来更自然、更好,如下所示:
您可能感兴趣 https://github.com/pandas-ml/pandas-ml/
它实现了混淆矩阵的 Python Pandas 实现。
一些功能:
这是一个例子:
In [1]: from pandas_ml import ConfusionMatrix
In [2]: import matplotlib.pyplot as plt
In [3]: y_test = ['business', 'business', 'business', 'business', 'business',
'business', 'business', 'business', 'business', 'business',
'business', 'business', 'business', 'business', 'business',
'business', 'business', 'business', 'business', 'business']
In [4]: y_pred = ['health', 'business', 'business', 'business', 'business',
'business', 'health', 'health', 'business', 'business', 'business',
'business', 'business', 'business', 'business', 'business',
'health', 'health', 'business', 'health']
In [5]: cm = ConfusionMatrix(y_test, y_pred)
In [6]: cm
Out[6]:
Predicted business health __all__
Actual
business 14 6 20
health 0 0 0
__all__ 14 6 20
In [7]: cm.plot()
Out[7]: <matplotlib.axes._subplots.AxesSubplot at 0x1093cf9b0>
In [8]: plt.show()
In [9]: cm.print_stats()
Confusion Matrix:
Predicted business health __all__
Actual
business 14 6 20
health 0 0 0
__all__ 14 6 20
Overall Statistics:
Accuracy: 0.7
95% CI: (0.45721081772371086, 0.88106840959427235)
No Information Rate: ToDo
P-Value [Acc > NIR]: 0.608009812201
Kappa: 0.0
Mcnemar's Test P-Value: ToDo
Class Statistics:
Classes business health
Population 20 20
P: Condition positive 20 0
N: Condition negative 0 20
Test outcome positive 14 6
Test outcome negative 6 14
TP: True Positive 14 0
TN: True Negative 0 14
FP: False Positive 0 6
FN: False Negative 6 0
TPR: (Sensitivity, hit rate, recall) 0.7 NaN
TNR=SPC: (Specificity) NaN 0.7
PPV: Pos Pred Value (Precision) 1 0
NPV: Neg Pred Value 0 1
FPR: False-out NaN 0.3
FDR: False Discovery Rate 0 1
FNR: Miss Rate 0.3 NaN
ACC: Accuracy 0.7 0.7
F1 score 0.8235294 0
MCC: Matthews correlation coefficient NaN NaN
Informedness NaN NaN
Markedness 0 0
Prevalence 1 0
LR+: Positive likelihood ratio NaN NaN
LR-: Negative likelihood ratio NaN NaN
DOR: Diagnostic odds ratio NaN NaN
FOR: False omission rate 1 0
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
model.fit(train_x, train_y,validation_split = 0.1, epochs=50, batch_size=4)
y_pred=model.predict(test_x,batch_size=15)
cm =confusion_matrix(test_y.argmax(axis=1), y_pred.argmax(axis=1))
index = ['neutral','happy','sad']
columns = ['neutral','happy','sad']
cm_df = pd.DataFrame(cm,columns,index)
plt.figure(figsize=(10,6))
sns.heatmap(cm_df, annot=True)
使用
ConfusionMatrixDisplay
有一种非常简单的方法可以做到这一点。它支持display_labels
,可用于显示绘图的标签
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
np.random.seed(0)
y_true = np.random.randint(0,3, 100)
y_pred = np.random.randint(0,3, 100)
labels = ['cat', 'dog', 'rat']
cm = confusion_matrix(y_true, y_pred)
ConfusionMatrixDisplay(cm, display_labels=labels).plot()
#plt.savefig("Confusion_Matrix.png")
输出:
将 X 轴标签更改为垂直位置(当类标签在绘图中重叠时需要),并直接根据预测进行绘图。
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
np.random.seed(0)
n = 10
y_true = np.random.randint(0,n, 100)
y_pred = np.random.randint(0,n, 100)
labels = [f'class_{i+1}' for i in range(n)]
fig, ax = plt.subplots(figsize=(15, 15))
ConfusionMatrixDisplay.from_predictions(
y_true, y_pred, display_labels=labels, xticks_rotation="vertical",
ax=ax, colorbar=False, cmap="plasma")
给定模型,validx,validy。在其他答案的大力帮助下,这符合我的需求。
sklearn.metrics.plot_confusion_matrix
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(26,26))
sklearn.metrics.plot_confusion_matrix(model, validx, validy, ax=ax, cmap=plt.cm.Blues)
ax.set(xlabel='Predicted', ylabel='Actual', title='Confusion Matrix Actual vs Predicted')
classifier = svm.SVC(kernel="linear", C=0.01).fit(X_train, y_train)
disp = ConfusionMatrixDisplay.from_estimator(
classifier,
X_test,
y_test,
display_labels=class_names,
cmap=plt.cm.Blues,`enter code here`
normalize=normalize,
)
disp.ax_.set_title(title) # this line is your answer
plt.show()
将 numpy 导入为 np
导入 matplotlib Pilot 作为坑
从 sklearn 矩阵导入混淆矩阵,混淆,
矩阵显示,分类报告。
4 true = np.array([0,1,2,2,0,1,2,0,1,2])
4- Pred = np.array ([0,1,2,2,0,1,1,0,2,2)
cm fusion_matriz(4. true,y pred)
class=names=['class o","class", 'class']
display = 混淆矩阵显示(confusion-matala=cm。
显示标签clay-namg)
显示。绘制 Ccmap=pit.cm-Blues)
坑标题(“混淆矩阵”)
plt。显示()
报告分类-报告 cy - true, y-pred, target_names=
打印(“分类报告:”)
粘土名称)
打印(报告)