我的大学项目有问题。我们必须做一些与二叉分割、二叉搜索树和搜索相关的任务。我们必须测量整个过程的时间,我们被迫使用大量数据来使测量时间具有可比性。这是我们制作的代码:
import copy
import random
import time
import csv
import sys
import threading
sys.setrecursionlimit(2**31-31)
threading.stack_size(2**27)
def generateArrayNonRepeating(n):
return random.sample(range(0, n), n)
def QSmid(table):
if len(table) <= 1:
return table
pivot_index = len(table) // 2
pivot = table[pivot_index]
left = []
right = []
for i in range(len(table)):
if i != pivot_index:
if table[i] < pivot:
left.append(table[i])
else:
right.append(table[i])
return QSmid(left) + [pivot] + QSmid(right)
def search(table, value):
for i in range(len(table)):
if table[i] == value:
return i
def binarySearch(table, value):
left = 0
right = len(table) - 1
while left <= right:
mid = (left + right) // 2
if table[mid] == value:
return mid
elif table[mid] < value:
left = mid + 1
else:
right = mid - 1
def binarySplit(table):
if len(table) <= 1:
return table
mid_pivot = len(table) // 2
mid = []
mid.append(table[mid_pivot])
left = table[:mid_pivot]
right = table[mid_pivot+1:]
return mid + binarySplit(left) + binarySplit(right)
class Node():
def __init__(self, data=None):
self.data = data
self.left_child = None
self.right_child = None
class BST():
def __init__(self):
self.root = Node(None)
def add(self, data):
if self.root.data is None:
self.root.data = data
else:
self.add_to_top(data, self.root)
def add_to_top(self, data, top):
if data != top.data:
if data < top.data:
if top.left_child is None:
top.left_child = Node(data)
return top.left_child
else:
return self.add_to_top(data, top.left_child)
else:
if top.right_child is None:
top.right_child = Node(data)
return top.right_child
else:
return self.add_to_top(data, top.right_child)
else:
return top
def height(self):
return self._height(self.root)
def _height(self, node):
if node is None:
return 0
else:
left_height = self._height(node.left_child)
right_height = self._height(node.right_child)
return max(left_height, right_height) + 1
def search(self, data):
return self._search(data, self.root)
def _search(self, data, node):
if node is None or node.data == data:
return node
elif data < node.data:
return self._search(data, node.left_child)
else:
return self._search(data, node.right_child)
with open('resultsCreate.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["n", "CB", "CTA", "CTB"])
with open('resultsSearch.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["n", "SA", "SB", "STA", "STB"])
with open('resultsHeight.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["n", "HTA", "HTB"])
for j in range(5):
for i in range(10):
n = 10000 + i * 1000
A = generateArrayNonRepeating(n)
start = time.time()
B = copy.deepcopy(A)
QSmid(B)
end = time.time()
CB = end - start
print("kopiowanie i sortowanie")
start = time.time()
for i in B:
search(A, i)
end = time.time()
SA = end - start
print("sa")
start = time.time()
for i in A:
binarySearch(B, i)
end = time.time()
SB = end - start
print("sb")
start = time.time()
TA = BST()
TA.add(A[0])
for i in range(1, len(A)):
TA.add(i)
end = time.time()
CTA = end - start
HTA = TA.height()
print("git")
start = time.time()
for i in A:
TA.search(i)
end = time.time()
STA = end - start
print("drzewo")
start = time.time()
TB = BST()
TB.add(A[0])
for i in range(1, len(A)):
TB.add(i)
end = time.time()
CTB = end - start
HTB = TB.height()
print("wysokosc drzewa")
start = time.time()
for i in A:
TB.search(i)
end = time.time()
STB = end - start
print("search in tree")
with open('resultsCreate.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([n, CB, CTA, CTB])
with open('resultsSearch.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([n, SA, SB, STA, STB])
with open('resultsHeight.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([n, HTA, HTB])
with open('resultsCreate.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([" ", " ", " ", " "])
with open('resultsSearch.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([" ", " ", " ", " ", " "])
with open('resultsHeight.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([" ", " ", " "])
del A
del B
del TA
del TB
我们面临的问题是“进程完成,退出代码为 -1073741571 (0xC00000FD)”,因此深度递归/堆栈溢出
我们已经尝试添加 thread.lock 来避免这种情况,但问题仍然存在,只是在稍长一段时间后突然出现。我已将 pycharm 中的最大堆大小更改为 99999999 MiB,但它仍然不会使用我所有的 ram。老实说,当程序在我的 ram 消耗中运行时,我什至无法识别任何变化。 我如何强制 pycharm/python 使用我的更多资源来完成这项工作?当我使用小于 ~3500 的数据时,它可以工作,但我们无法发现任何恒定的时间差异。