我是一名新的 CE 学生,我想制作一个 python 程序,从文本文件中读取输入并在文本文件中给出输出。 在这个程序中,我希望它逐行获取输入并给出其内容的频率。 但它不准确并且输出混乱。 我希望你能帮助我。
这是它给我的输出:
'Kidney Beans': 5
'Onion': 4
: 4
['Milk': 3
'Yogurt']: 3
'Kidney Beans' 'Yogurt']: 3
'Yogurt']: 3
这是我想要的输出:
Kidney Beans: 5
Onion: 4
Eggs: 4
Yogurt: 3
Yogurt, Kidney Beans: 3
Milk: 3
Kidney Beans, Milk: 3
这是我的Python代码:
class TreeNode:
def __init__(self, name, frequency, parent):
self.name = name
self.frequency = frequency
self.parent = parent
self.link = None
self.children = {}
def increment(self, frequency):
self.frequency += frequency
# Update the tree with filtered transactions
def update_tree(items, node, header_table):
first_item = items[0]
if first_item in node.children:
node.children[first_item].increment(1)
else:
new_node = TreeNode(first_item, 1, node)
node.children[first_item] = new_node
# Link the new node to nodes having the same item name
if not header_table[first_item][1]:
header_table[first_item][1] = new_node
else:
update_header(new_node, header_table[first_item][1])
if len(items) > 1:
update_tree(items[1:], node.children[first_item], header_table)
# Update the header table to link similar items
def update_header(node_to_test, target_node):
while target_node.link is not None:
target_node = target_node.link
target_node.link = node_to_test
# Find frequent itemsets
def mine_tree(header_table, min_support, prefix, freq_items):
sorted_items = [v[0] for v in sorted(header_table.items(), key=lambda p: (p[1][0], p[0]))]
for base_pat in sorted_items[::-1]: # Start from bottom up
new_freq_set = prefix.copy()
new_freq_set.add(base_pat)
freq_items.append((new_freq_set, header_table[base_pat][0]))
# Find prefixes
cond_patt_bases = find_prefix_path(base_pat, header_table[base_pat][1])
# Create conditional tree
cond_tree, head = create_tree(cond_patt_bases, min_support)
if head is not None:
mine_tree(head, min_support, new_freq_set, freq_items)
# Ascend tree
def ascend_tree(node, prefix_path):
if node.parent is not None:
prefix_path.append(node.name)
ascend_tree(node.parent, prefix_path)
# Find prefix path
def find_prefix_path(base_pat, treeNode):
cond_pats = {}
while treeNode is not None:
prefix_path = []
ascend_tree(treeNode, prefix_path)
if len(prefix_path) > 1:
cond_pats[frozenset(prefix_path[1:])] = treeNode.frequency
treeNode = treeNode.link
return cond_pats
# Create the FP-growth tree
def create_tree(transactions, min_support):
header_table = {}
for transaction in transactions:
for item in transaction:
header_table[item] = header_table.get(item, 0) + 1
# Remove items not meeting minimum support
for k in list(header_table):
if header_table[k] < min_support:
del(header_table[k])
freq_item_set = set(header_table.keys())
if len(freq_item_set) == 0:
return None, None
# Initialize header table
for k in header_table:
header_table[k] = [header_table[k], None]
tree_root = TreeNode('Null Set', 1, None)
for transaction in transactions:
transaction_filtered = [item for item in transaction if item in freq_item_set]
transaction_filtered.sort(key=lambda item: header_table[item][0], reverse=True)
if transaction_filtered:
update_tree(transaction_filtered, tree_root, header_table)
return tree_root, header_table
# Load data from file
def load_data(file_path):
dataset = []
with open('InputData.txt', 'r') as file:
for line in file.readlines():
transaction = line.strip().split(',') # Adjust delimiter if necessary
dataset.append(transaction)
return dataset
# Main function to run FP-growth algorithm
def fpgrowth():
file_path = "InputData.txt" # Specify your dataset file name
transactions = load_data(file_path)
min_support = int(input("Please enter the minimum support: "))
# Build the FP-growth tree
tree, header_table = create_tree(transactions, min_support)
# Find frequent itemsets
freq_items = []
if tree is not None:
mine_tree(header_table, min_support, set(), freq_items)
# Write the frequent itemsets to the output file
output_file_name = "frequent_itemsets.txt"
with open(output_file_name, 'w') as f:
for itemset, support in sorted(freq_items, key=lambda i: i[1], reverse=True):
f.write(f"{' '.join(itemset)}: {support}\n")
print(f"Frequent itemsets written to {output_file_name}")
# Run the FP-growth algorithm
fpgrowth()
这是我的数据库:
dataset = [ ['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Milk', 'Apple', 'Kidney Beans', 'Eggs'], ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'] ]
我尝试了很多东西,例如 ChatGPT 并询问了我的同事,但都是一样的。
您的
load_data()
函数似乎需要与您实际拥有的文件格式不同的文件格式。
尝试用以下内容替换输入文件:
Milk,Onion,Nutmeg,Kidney Beans,Eggs,Yogurt
Dill,Onion,Nutmeg,Kidney Beans,Eggs,Yogurt
Milk,Apple,Kidney Beans,Eggs
Milk,Unicorn,Corn,Kidney Beans,Yogurt
Corn,Onion,Onion,Kidney Beans,Ice cream,Eggs