import re
import concurrent.futures
import time
setting1 = 101
setting2 = 321
setting3 = None
threads = 1000
start = time.perf_counter()
def load_keywords(keyword_file):
with open(keyword_file, 'r') as keywordhandle:
keywords = [keyword.strip() for keyword in keywordhandle.readlines()]
return keywords
def gencombo(user, keywords):
og_user = re.sub(r'\d', '', user)
user_lower = og_user.lower()
combos = []
n_keywords = []
for keyword in keywords:
if keyword.lower() in user_lower:
n_keywords.append(keyword)
for k1 in n_keywords:
k1 = str(k1)
for k2 in n_keywords:
k2 = str(k2)
if k2 in k1 and k2 is not k1:
n_keywords.remove(k2)
n_keywords.reverse()
for k1 in n_keywords:
k1 = str(k1)
for k2 in n_keywords:
k2 = str(k2)
if k2 in k1 and k2 is not k1:
n_keywords.remove(k2)
for keyword in n_keywords:
if keyword.lower() in user_lower:
print(f"Combos added for: {user}")
if setting1 is not None:
combos.append(f"{user}:{keyword}{str(setting1)}")
if setting2 is not None:
combos.append(f"{user}:{keyword}{str(setting2)}")
if setting3 is not None:
combos.append(f"{user}:{keyword}{str(setting3)}")
return '\n'.join(combos) if combos else None
def gencombos(input_file, output_file, keyword_file):
keywords = load_keywords(keyword_file)
with open(input_file, 'r') as combohandle:
with open(output_file, 'w') as handlenone:
def threadrun():
for user in combohandle:
combo = gencombo(user.strip(), keywords)
if combo:
handlenone.write(f"{combo}\n")
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.submit(threadrun)
gencombos("users.txt", "output.txt", "keywords.txt")
finish = time.perf_counter()
input(f"Finished in {round(finish - start, 2)} seconds.")
我首先尝试单独使用线程,但后来我发现并发.futures 有助于运行某些函数。因此,我转向了这一点。然而,即使在切换之后,我发现配置的线程数并不重要。它仍然大约在同一时间运行。我希望你们中的一位能帮助我,因为我不明白。
我建议您在尝试学习该语言时采用更复杂的路线,但是 Python 中的线程,尤其是对于如此简单的东西,几乎永远不会看到性能的飞跃。事实上,有时当您使用正则表达式和循环列表等受 CPU 限制的任务时,由于解释器的工作方式,线程实际上会减慢程序速度。
1000个线程开销也不少了
从纯粹的效率角度来看,您可能会发现在这样的脚本中使用多处理而不是多线程更成功。我不一定想以“更好”的方式为您重写此代码,但我建议您阅读和研究有关 Python 环境中的多处理的内容。