这个python代码的输出不是我所期望的

问题描述 投票:0回答:1

我是一名新的 CE 学生,我想制作一个 python 程序,从文本文件中读取输入并在文本文件中给出输出。 在这个程序中,我希望它逐行获取输入并给出其内容的频率。 但它不准确并且输出混乱。 我希望你能帮助我。

这是它给我的输出:

 'Kidney Beans': 5
 'Onion': 4
: 4
['Milk': 3
 'Yogurt']: 3
 'Kidney Beans'  'Yogurt']: 3
  'Yogurt']: 3

这是我想要的输出:

Kidney Beans: 5
Onion: 4
Eggs: 4
Yogurt: 3
Yogurt, Kidney Beans: 3
Milk: 3
Kidney Beans, Milk: 3

这是我的Python代码:


class TreeNode:
    def __init__(self, name, frequency, parent):
        self.name = name
        self.frequency = frequency
        self.parent = parent
        self.link = None
        self.children = {}
    
    def increment(self, frequency):
        self.frequency += frequency

# Update the tree with filtered transactions
def update_tree(items, node, header_table):
    first_item = items[0]
    if first_item in node.children:
        node.children[first_item].increment(1)
    else:
        new_node = TreeNode(first_item, 1, node)
        node.children[first_item] = new_node

        # Link the new node to nodes having the same item name
        if not header_table[first_item][1]:
            header_table[first_item][1] = new_node
        else:
            update_header(new_node, header_table[first_item][1])

    if len(items) > 1:
        update_tree(items[1:], node.children[first_item], header_table)

# Update the header table to link similar items
def update_header(node_to_test, target_node):
    while target_node.link is not None:
        target_node = target_node.link
    target_node.link = node_to_test

# Find frequent itemsets
def mine_tree(header_table, min_support, prefix, freq_items):
    sorted_items = [v[0] for v in sorted(header_table.items(), key=lambda p: (p[1][0], p[0]))]
    for base_pat in sorted_items[::-1]:  # Start from bottom up
        new_freq_set = prefix.copy()
        new_freq_set.add(base_pat)
        freq_items.append((new_freq_set, header_table[base_pat][0]))
        
        # Find prefixes
        cond_patt_bases = find_prefix_path(base_pat, header_table[base_pat][1])
        # Create conditional tree
        cond_tree, head = create_tree(cond_patt_bases, min_support)

        if head is not None:
            mine_tree(head, min_support, new_freq_set, freq_items)

# Ascend tree
def ascend_tree(node, prefix_path):
    if node.parent is not None:
        prefix_path.append(node.name)
        ascend_tree(node.parent, prefix_path)

# Find prefix path
def find_prefix_path(base_pat, treeNode):
    cond_pats = {}
    while treeNode is not None:
        prefix_path = []
        ascend_tree(treeNode, prefix_path)
        if len(prefix_path) > 1:
            cond_pats[frozenset(prefix_path[1:])] = treeNode.frequency
        treeNode = treeNode.link
    return cond_pats

# Create the FP-growth tree
def create_tree(transactions, min_support):
    header_table = {}
    for transaction in transactions:
        for item in transaction:
            header_table[item] = header_table.get(item, 0) + 1

    # Remove items not meeting minimum support
    for k in list(header_table):
        if header_table[k] < min_support:
            del(header_table[k])

    freq_item_set = set(header_table.keys())
    if len(freq_item_set) == 0:
        return None, None

    # Initialize header table
    for k in header_table:
        header_table[k] = [header_table[k], None]

    tree_root = TreeNode('Null Set', 1, None)
    for transaction in transactions:
        transaction_filtered = [item for item in transaction if item in freq_item_set]
        transaction_filtered.sort(key=lambda item: header_table[item][0], reverse=True)
        if transaction_filtered:
            update_tree(transaction_filtered, tree_root, header_table)
    return tree_root, header_table

# Load data from file
def load_data(file_path):
    dataset = []
    with open('InputData.txt', 'r') as file:
        for line in file.readlines():
            transaction = line.strip().split(',')  # Adjust delimiter if necessary
            dataset.append(transaction)
    return dataset

# Main function to run FP-growth algorithm
def fpgrowth():
    file_path = "InputData.txt"  # Specify your dataset file name
    transactions = load_data(file_path)
    min_support = int(input("Please enter the minimum support: "))

    # Build the FP-growth tree
    tree, header_table = create_tree(transactions, min_support)

    # Find frequent itemsets
    freq_items = []
    if tree is not None:
        mine_tree(header_table, min_support, set(), freq_items)

    # Write the frequent itemsets to the output file
    output_file_name = "frequent_itemsets.txt"
    with open(output_file_name, 'w') as f:
        for itemset, support in sorted(freq_items, key=lambda i: i[1], reverse=True):
            f.write(f"{' '.join(itemset)}: {support}\n")
    print(f"Frequent itemsets written to {output_file_name}")

# Run the FP-growth algorithm
fpgrowth()

这是我的数据库:

dataset = [ ['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'], ['Milk', 'Apple', 'Kidney Beans', 'Eggs'], ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'], ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs'] ]

我尝试了很多东西,例如 ChatGPT 并询问了我的同事,但都是一样的。

python data-analysis data-mining
1个回答
0
投票

您的

load_data()
函数似乎需要与您实际拥有的文件格式不同的文件格式。

尝试用以下内容替换输入文件:

Milk,Onion,Nutmeg,Kidney Beans,Eggs,Yogurt
Dill,Onion,Nutmeg,Kidney Beans,Eggs,Yogurt
Milk,Apple,Kidney Beans,Eggs
Milk,Unicorn,Corn,Kidney Beans,Yogurt
Corn,Onion,Onion,Kidney Beans,Ice cream,Eggs
© www.soinside.com 2019 - 2024. All rights reserved.