liste = [ (1, ["A",1,"C"]) , (3, ["A",1,"C"]) , (1000.256, ["B",1,"C"]) , (1002, ["C",1,"C"]) , (5, ["D",1,"C"]) , (999.3, ["E",1,"C"]) , (2.5, ["F",1,"C"])]
xxx=np.array(liste,dtype=object)
best_n_clusters,best_silhouette_score=None,-1
range_n_clusters=[2,3,4,5,6,7,8,9,10,11]
for n_clusters in range_n_clusters:
clusterer=KMeans(n_clusters=n_clusters)
cluster_labels=clusterer.fit(xxx)
silhouette_avg=silhouette_score(xxx,cluster_labels)
if silhouette_avg>best_silhouette_score:
best_silhouettescore=silhouette_avg
best_n_clusters=n_clusters
kmeans=KMeans(n_clusters=best_n_clusters)
cluster_labels=kmeans.fit(xxx)
for r in range(best_n_clusters):
group=xxx[cluster_labels==i]
print(f"Groupe {r+1} : {group}")
我有一个由元组组成的初始列表。在第一个位置 [0] 中,它是一个数字,在位置 [1] 中是一个值列表。我想要的是将元组重新分组到不同的组中,将位置 [0] 中的数字足够接近的元组连接在一起。 在此示例中,我希望在打印中退出 2 个列表:
我想使用kmeans的方法对这个点进行分组,我发现这个方法非常有效,并用轮廓的方法确定最佳分组数。我已经使用此方法对具有坐标的点进行分组,但这里只有一个值(位置 [0] 中的数字)来重新分组它们,并且我遇到了一些问题。 我遇到的问题是,我给出的结构“clusterer.fit(xxx)”返回错误:“用序列设置数组元素”。 我该如何解决这个问题?
解决方案:
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
# Your initial list of tuples
liste = [(1, ["A", 1, "C"]), (3, ["A", 1, "C"]), (1000.256, ["B", 1, "C"]),
(1002, ["C", 1, "C"]), (5, ["D", 1, "C"]), (999.3, ["E", 1, "C"]),
(2.5, ["F", 1, "C"])]
# Extract numeric values (position [0]) and reshape
numeric_values = np.array([t[0] for t in liste]).reshape(-1, 1)
# Determine optimal number of clusters using silhouette scores
best_n_clusters, best_silhouette_score = None, -1
range_n_clusters = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
for n_clusters in range_n_clusters:
if n_clusters>=len(liste):pass
else:
clusterer = KMeans(n_clusters=n_clusters)
cluster_labels = clusterer.fit_predict(numeric_values)
silhouette_avg = silhouette_score(numeric_values, cluster_labels)
if silhouette_avg > best_silhouette_score:
best_silhouette_score = silhouette_avg
best_n_clusters = n_clusters
# Perform K-means clustering with the optimal number of clusters
kmeans = KMeans(n_clusters=best_n_clusters)
cluster_labels = kmeans.fit_predict(numeric_values)
# Group tuples based on cluster labels
groups = [[] for _ in range(best_n_clusters)]
for i, t in enumerate(liste):
groups[cluster_labels[i]].append(t)
# Print the grouped tuples
for r, group in enumerate(groups):
print(f"Group {r + 1}: {group}")