我一直在尝试学习多处理。我想通过编写一个生成随机数列表的小程序来测试我的理解。然后使用两种不同的方法来查找列表中的最大数字。第一种方法不使用多处理,第二种方法使用多处理。然而,当我运行代码时,无论列表的长度如何,当我使用多处理代码时,运行时间总是慢数百倍。
import random
import multiprocessing
import time
def random_num_list(length=100000):
return [random.randint(-13443,435234) for i in range(length)]
rando_list = random_num_list()
def method1(lst):
biggest_num = -13443
for i in lst:
if i > biggest_num:
biggest_num = i
return biggest_num
lst1, lst2 = rando_list[:int(len(rando_list)/3)], rando_list[int(len(rando_list)/3):]
def partial_process(lst1, lst2):
biggest_num = -13443
for i in lst1:
if i > biggest_num:
biggest_num = i
lst2.append(biggest_num)
#print("New record added!\n")
def full_process(lst2):
biggest_num = -13443
for i in lst2:
if i > biggest_num:
biggest_num = i
print(biggest_num)
def method2(lst1,lst2):
with multiprocessing.Manager() as manager:
lst2 = manager.list(lst2)
p1 = multiprocessing.Process(target=partial_process,args=(lst1, lst2))
p2 = multiprocessing.Process(target=full_process,args=(lst2,))
p1.start()
p1.join()
p2.start()
p2.join()
if __name__ == '__main__':
method2(lst1,lst2)
print(method1(rando_list))
我也尝试过这样写方法2:
def method2(lst):
num_processes = multiprocessing.cpu_count() # Number of processes to match the number of CPU cores
chunk_size = len(lst) // num_processes
with multiprocessing.Pool(processes=num_processes) as pool:
# Split the list into chunks and process in parallel
results = pool.map(find_max, [lst[i * chunk_size:(i + 1) * chunk_size] for i in range(num_processes)])
return max(results)
为此,您应该使用 map()。
我们来回顾一下:
from random import randint
from multiprocessing import Pool
import time
N = 100_000
def getmax(_list):
# could obviously use built-in max() function which would be faster
# but we want the process to (artificially) use more CPU than is actually necessary
_max = float("-inf")
for n in _list:
if n > _max:
_max = n
return _max # returns an int
def main():
_list = [randint(-13443, 435234) for _ in range(N)]
# split the list into two halves
assert len(_list) % 2 == 0
args = _list[: len(_list) // 2], _list[len(_list) // 2 :]
with Pool() as pool:
# args is a 2-tuple with references to each half of the list
results = pool.map(getmax, args)
# therefore results will be a list containing 2 ints - one from each of the subprocesses
m = max(results)
print(m)
assert m == max(_list)
if __name__ == "__main__":
start = time.time()
main()
end = time.time()
print(f"Duration = {end-start:.2f}s")
输出:
Duration = 0.11s