我编写了一个 python 脚本来使用聚类(K-means)获得 TSP 的解决方案。原理是创建集群并获取每个集群之间的最佳路径(使用它们的质心)。然后,对于每个簇: - 如果内部城市数量低于阈值 -> 获取城市之间的最优路径 -else -> 获取当前集群内的子集群并重复步骤...最后,它返回必须是最佳路径的城市列表。
阈值(集群内生成子集群的最小城市数量)始终大于生成的子集群数量,这意味着每个集群必须有超过 1 个城市。 此外,当它生成随机城市时,它应该删除重复的城市并添加新的城市,直到所有城市都不同。
尽管采取了这 2 项预防措施,我还是遇到了此错误:ConvergenceWarning:发现的不同簇数 (5) 小于 n_clusters (6)。可能是由于 X 中的重复点。
我不明白为什么。有人知道我为什么会出现此错误以及如何解决它吗? 我不想消除这个错误,我想修复它。
TRESHOLD 和 NB_CLUSTERS 在导入后立即修复。 这是生成随机城市并删除重复项(或应该这样做)的部分:
# list of cities
cities_2 = []
for i in range(10000):
cities_2.append(City([random.randint(0,100), random.randint(0,100)]))
#Check length --> if it's not 1000, add more cities
while len(list(dict.fromkeys(cities_2))) != len(cities_2):
# Remove duplicates
cities_2 = list(dict.fromkeys(cities_2))
for i in range(1000 - len(cities_2)):
cities_2.append(City([random.randint(0,100), random.randint(0,100)]))
完整代码如下:
#Solve a traveling salesman problem using K-means clustering recursively
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import itertools, math, random, sys, openpyxl, time
start_time = time.time()
NB_CLUSTERS = 6
TRESHOLD = 8
# Class to represent a city
class City:
def __init__(self, coordinates, name= None):
# Coordinates of the city
self.coordinates = coordinates
if name==None:
self.name = f"City_{self.coordinates[0]}{self.coordinates[1]}"
else:
self.name = name
def showTime(self):
print(f"City name: {self.name}, coordinates: ({self.coordinates[0]}, {self.coordinates[1]})")
# Class to represent a cluster (geographic area)
class Area(City):
def __init__(self, coordinates, cities = None, nbClusters = NB_CLUSTERS, isEarth = False):
# Coordinates of the area (mean of all cities in the area)
super().__init__(coordinates)
# List of cities in the area
if cities == None:
self.citiesList = []
else:
self.citiesList = cities
# List of (sub)areas in the area
self.areasList = []
# number of sub-areas in the area
self.nbMAreas = nbClusters
# Is earth .
self.isEarth = isEarth
if self.isEarth:
# next cluster
self.nextCluster = self.citiesList[0]
self.nextClusterCoordinates = self.citiesList[0].coordinates
# previous cluster
self.previousCluster = self.citiesList[0]
self.previousClusterCoordinates = self.citiesList[0].coordinates
else:
# Set during the execution
# next cluster
self.nextCluster = None
self.nextClusterCoordinates = [0, 0]
# previous cluster
self.previousCluster = None
self.previousClusterCoordinates = [0, 0]
# Frist sub-area
self.firstSubCluster = None
# Last sub-area
self.lastSubCluster = None
def addCities(self, cities):
self.citiesList.extend(cities)
def setNextClusters(self, nextC):
self.nextCluster = nextC
self.nextClusterCoordinates = nextC.coordinates
def setPreviousClusters(self, previousC):
self.previousCluster = previousC
self.previousClusterCoordinates = previousC.coordinates
# From the cities in the area, get clusters (sub-areas)
# Add cities to their sub-areas
# Get the first and last clusters
# Sort clusters
# Output: self.areasList = list of sub-areas (sorted)
def getClusters(self):
data = []
# Check if there are enough cities to cluster
#if len(self.citiesList) >= TRESHOLD:
# Add coordinates of all cities in the area to the data
for city in self.citiesList:
data.append([city.coordinates[0], city.coordinates[1]])
# Cluster the data
self.kmeans = KMeans(n_clusters=self.nbMAreas)
self.kmeans.fit(data)
# Add each cluster to areasList
for center in self.kmeans.cluster_centers_:
self.areasList.append(Area( coordinates = center, nbClusters=self.nbMAreas))
# Add cities to each cluster
for i, label in enumerate(self.kmeans.labels_):
self.areasList[label].addCities([self.citiesList[i]])
# Remove empty clusters
self.areasList = list(filter(lambda x: len(x.citiesList) > 0, self.areasList))
# Sort clusters to get the minimum distance between clusters
self.getPerfectPath()
# get first and last sub-cluster
self.getFirstAndLastSubCluster()
# set sub-clusters neighbours
for i in range(len(self.areasList)):
nextClustIndex = i+1
if nextClustIndex == len(self.areasList):
self.areasList[i].setNextClusters(self.nextCluster)
else:
self.areasList[i].setNextClusters(self.areasList[nextClustIndex])
previousClustIndex = i-1
if previousClustIndex == -1:
self.areasList[i].setPreviousClusters(self.previousCluster)
else:
self.areasList[i].setPreviousClusters(self.areasList[previousClustIndex])
# Get the perfect path between points (cities or clusters)
# First sub-cluster is firstSubCluster
# Last sub-cluster is lastSubCluster
# Output: self.areasList = list of sub-areas (sorted)
# Use permutations
def getPerfectPath(self, cluster = True):
# Get the list of sub-areas without the first and last sub-areas
if cluster: localList = self.areasList
else: localList = self.citiesList
# Remove the first and last sub-areas
minDistance = 999999999999999
# Get previous point = lastClusterCoordinate from previous cluster
if self.isEarth == True:
previousPoint = self.previousCluster
else:
try:
previousPoint = self.previousCluster.lastSubCluster
except:
previousPoint = self.previousCluster
# Get next point = next cluster coordinate
nextPoint = self.nextCluster
# Get for all pemrutations
for permutation in itertools.permutations(localList):
permutation = list(permutation)
# Calculate the total ditance
permutation.insert(0, previousPoint)
permutation.append(nextPoint)
distance = self.distanceFullPath(permutation)
if distance < minDistance:
minDistance = distance
if cluster:
self.areasList = permutation[1:-1]
else:
self.citiesList = permutation[1:-1]
@staticmethod
def distanceFullPath(myClusters):
distance = 0
for i in range(len(myClusters) - 1):
distance += math.dist(myClusters[i].coordinates, myClusters[i+1].coordinates)
return distance
# Function returning the optimal path (cities) --> function call by user
def getOptimalPath(self):
listOfCities = []
# Check if there are enough cities in cluster
if len(self.citiesList) >= TRESHOLD:
# Get sub-clusters
self.getClusters()
# for each sub-cluster, get the optimal path
for area in self.areasList:
listOfCities.extend(area.getOptimalPath())
if self.isEarth:
# Add the first city at the end (to close the path)
listOfCities.append(listOfCities[0])
return listOfCities
# If not enough cities, return the perfect path between cities in the area
else:
self.perfectCitiesPath()
return self.citiesList
def perfectCitiesPath(self):
# Check if there are not too many cities in cluster
if len(self.citiesList) < TRESHOLD:
# Sort clusters to get the minimum distance between clusters
self.getPerfectPath(False)
# Get first and last cities
self.getFirstAndLastSubCluster(False)
else:
print("ERROR: Too many cities")
def getFirstAndLastSubCluster(self, clusters=True):
if clusters:
self.firstSubCluster = self.areasList[0]
self.lastSubCluster = self.areasList[-1]
else:
self.firstSubCluster = self.citiesList[0]
self.lastSubCluster = self.citiesList[-1]
def showTime(self):
print(f"Cluster name: {self.name}, coordinates: ({self.coordinates[0]}, {self.coordinates[1]})")
# Print the list of cities in the cluster
for city in self.citiesList:
city.showTime()
# Display the area
def displayArea(self, solution = False, showInfo = True):
x = []
y = []
for city in self.citiesList:
x.append(city.coordinates[0])
y.append(city.coordinates[1])
# True if clusters have been solved
if solution:
try:
# Plot cities with clusters colors
plt.scatter(x, y, c=self.kmeans.labels_)
except:
print("Please solve the problem first")
# False if clusters have not been solved
else:
# Plot cities without clusters colors
plt.scatter(x, y)
if showInfo:
# add annotations for each point
for city in self.citiesList:
plt.annotate(f'{city.name}({city.coordinates[0]},{city.coordinates[1]})', xy=(city.coordinates[0], city.coordinates[1]), textcoords='offset points', xytext=(0, 10), ha='center')
plt.show()
def displayConnectedCities(citiesList, plot = True):
# Check if the list is not empty
if not citiesList:
print("The cities list is empty.")
return
# Extract coordinates and names for plotting and annotation
x_coords = [city.coordinates[0] for city in citiesList]
y_coords = [city.coordinates[1] for city in citiesList]
names = [city.name for city in citiesList]
# Plot cities
#plt.scatter(x_coords, y_coords)
# Connect cities
plt.plot(x_coords, y_coords, marker='') # Connects in order
"""
# Annotate each city
for i, name in enumerate(names):
plt.annotate(name, (x_coords[i], y_coords[i]))
"""
# Display the graph
if plot:
plt.show()
def importData(filePath):
data = []
with open(filePath) as fp:
line = fp.readline()
while line:
data.append(line)
line = fp.readline()
# Delete 7 first first lines
data = data[7:-1]
# from "A BBBB.BBB CCCC.CCC" to [BBBB.BBB, CCCC.CCC]
for line in range(len(data)):
strline = data[line].split(" ")
data[line] = [float(strline[2][:-2]), float(strline[1])]
return data
def main():
# averageDistance = []
# # Make iterations
# for i in range(10):
# earth = Area([0,0], cities = cities_2, isEarth = True)
# optimal_path = earth.getOptimalPath()
# optimal_path.append(optimal_path[0])
# print("-----------------------")
# print("Nb of clusters: X") #, earth.nbMAreas)
# print("Original path distance:", Area.distanceFullPath(cities_2))
# print("Optimal path distance:", Area.distanceFullPath(optimal_path))
# print("-----------------------")
# displayConnectedCities(optimal_path)
# averageDistance.append(Area.distanceFullPath(optimal_path))
# averageDistance = sum(averageDistance) / len(averageDistance)
# print("Average distance:", averageDistance)
# plt.show()
global TRESHOLD
# Clear logs.txt
logfile = open('mylogs.txt', 'w')
#logfile.write('')
#logfile.save()
# close it
logfile.close()
# Redirect sys.stdout to the file
stdout_fileno = sys.stdout
# Parameters variation
# NB of clusters: from 7 to 10
# Threshold: from NB of clusters to 12
# NB of cities : from 10 to 1000 (100 by 100)
it = 0
# list of cities keeping same cities through all iterations
all_cities = []
for i in range(1000):
all_cities.append(City([random.randint(0,100), random.randint(0,100)]))
#Check length --> if it's not 1000, add more cities
while len(list(dict.fromkeys(all_cities))) != len(all_cities):
# Remove duplicates
all_cities = list(dict.fromkeys(all_cities))
for i in range(1000 - len(all_cities)):
all_cities.append(City([random.randint(0,100), random.randint(0,100)]))
wb = openpyxl.load_workbook('results.xlsx')
sheet = wb['Feuil1']
sheet.delete_rows(2, 100000)
print("Start")
for nbClusters in range(6,8):
for threshold in range(nbClusters+1, 9):
TRESHOLD = threshold
# Select the nbCities first cities
my_cities = all_cities
earth = Area([0,0], cities = my_cities, isEarth = True, nbClusters = nbClusters)
optimal_path = earth.getOptimalPath()
sys.stdout = open('myLogs.txt', 'a')
print("-----------------------")
print("Nb of clusters:", nbClusters)
print("Threshold:", threshold)
print("Nb of cities:", len(my_cities))
print("Original path distance:", Area.distanceFullPath(my_cities))
print("Optimal path distance:", Area.distanceFullPath(optimal_path))
print("-----------------------")
sys.stdout = stdout_fileno
it += 1
print("iteration:", it)
# Write in excel sheet the results
sheet.append([nbClusters, threshold, len(my_cities), Area.distanceFullPath(my_cities), Area.distanceFullPath(optimal_path), Area.distanceFullPath(optimal_path) / Area.distanceFullPath(my_cities)])
wb.save('results.xlsx')
#displayConnectedCities(optimal_path)
def main_base():
# list of cities (FIXED)
Paris = City([0,5], name = "Paris")
London = City([10,2], name = "London")
Berlin = City([8,10], name = "Berlin")
Milan = City([7,10], name = "Milan")
Rio = City([3,5], name = "Rio")
Sydney = City([5,4], name = "Sydney")
Moscow = City([12,5], name = "Moscow")
Seoul = City([1,6], name = "Seoul")
Tokyo = City([1,5], name = "Tokyo")
NewYork = City([5,8], name = "NewYork")
cities_1 = [Paris, London, Berlin, Milan, Rio, Sydney, Moscow, Seoul, Tokyo, NewYork]
# list of cities
cities_2 = []
for i in range(10000):
cities_2.append(City([random.randint(0,100), random.randint(0,100)]))
#Check length --> if it's not 1000, add more cities
while len(list(dict.fromkeys(cities_2))) != len(cities_2):
# Remove duplicates
cities_2 = list(dict.fromkeys(cities_2))
for i in range(1000 - len(cities_2)):
cities_2.append(City([random.randint(0,100), random.randint(0,100)]))
cities_3 = []
data = importData("ch71009.txt")
for i in range(len(data)):
cities_3.append(City(data[i]))
earth = Area([0,0], cities = cities_2, isEarth = True)
optimal_path = earth.getOptimalPath()
print("distance:", Area.distanceFullPath(optimal_path))
print("--- %s seconds ---" % (time.time() - start_time))
displayConnectedCities(optimal_path)
def main_several():
# list of cities
cities_2 = []
for i in range(1000):
cities_2.append(City([random.randint(0,100), random.randint(0,100)]))
#Check length --> if it's not 1000, add more cities
while len(list(dict.fromkeys(cities_2))) != len(cities_2):
# Remove duplicates
cities_2 = list(dict.fromkeys(cities_2))
for i in range(1000 - len(cities_2)):
cities_2.append(City([random.randint(0,100), random.randint(0,100)]))
ITERATION = 100
for i in range(ITERATION):
earth = Area([0,0], cities = cities_2, isEarth = True)
optimal_path = earth.getOptimalPath()
displayConnectedCities(optimal_path, plot=False)
print("Iteration", i)
print("Optimal path distance:", Area.distanceFullPath(optimal_path))
plt.show()
def getOptiPaths(cities, nbClusters = 6, treshold = 8, nbIterations = 10):
pathList = []
global TRESHOLD
global NB_CLUSTERS
TRESHOLD = treshold
NB_CLUSTERS = nbClusters
print("Start")
for i in range(nbIterations):
print("Iteration", i)
earth = Area([0,0], cities = cities, isEarth = True)
optimal_path = earth.getOptimalPath()
pathList.extend([optimal_path])
print("End")
return pathList
if __name__ == "__main__":
# Number of processes to create
main_base()
我对这段代码还有另一个问题,我认为这更多是一个算法问题,如果你感兴趣 -> 在 python 中递归使用 K-means 进行 TSP 优化
我通过实施一种生成城市的新方法修复了该错误。
def generateCities(numOfCities):
# list of cities
cities = []
# Number of cities
numOfCities = numOfCities
# Minimum dimension of the map (minDim x minDim)
minDim = int(math.sqrt(numOfCities))+1
# List storing coordinates
coorList = []
# Randomly generate cities
for i in range(numOfCities):
# Generate random coordinates
randCoor = [random.uniform(0,minDim), random.uniform(0,minDim)]
# Check if the coordinates are not already in the list
while randCoor in coorList:
# If the coordinates are already in the list, generate a new one
randCoor = [random.uniform(0,minDim), random.uniform(0,minDim)]
# Add the coordinates to the list
coorList.append(randCoor)
# Add the city to the list
cities.append(City(randCoor))
# Return the list of cities
return cities