我正在开发一个复杂的规则解析器,它具有以下属性:
我能够执行简单的规则,但无法评估嵌套括号中的复杂规则。
这是我试图评估的嵌套规则定义:
definition = '((K00925 K00625),K01895) (K00193+K00197+K00194) (K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584) (K00399+K00401+K00402) (K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125))'
((K00925 K00625),K01895)
(K00193+K00197+K00194)
(K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584)
(K00399+K00401+K00402)
(K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125))
这是我几乎正确的代码:
import re
def rule_splitter(rule: str, split_characters: list) -> set:
"""
Split rule by characters.
Args:
rule (str): Boolean logical string.
split_characters (list): List of characters to split in rule.
Returns:
set: Unique symbols in a rule.
"""
rule_decomposed = str(rule)
if split_characters:
for character in split_characters:
character = character.strip()
rule_decomposed = rule_decomposed.replace(character, "")
unique_symbols = set(rule_decomposed.split())
return unique_symbols
def evaluate_rule(rule: str, items: set, replace={"+": " & ", ",": " | "}) -> bool:
"""
Evaluate a string of boolean logicals.
Args:
rule (str): Boolean logical string.
items (set): List of items in rule.
replace (dict, optional): Replace boolean characters. Defaults to {"+":" & ", ",":" | "}.
Returns:
bool: Evaluated rule.
"""
# Handle optional items prefixed by '-'
rule = re.sub(r'-\w+', '', rule)
# Replace characters for standard logical formatting
if replace:
for character_before, character_after in replace.items():
rule = rule.replace(character_before, character_after)
# Split the rule into individual symbols
unique_symbols = rule_splitter(rule, replace.values())
# Create a dictionary with the presence of each symbol in the items
item_to_bool = {sym: (sym in items) for sym in unique_symbols}
# Remove redundant logical operators and fix syntax
rule = re.sub(r'(&\s*){2,}', ' & ', rule) # Remove consecutive '&'
rule = rule.strip() # Remove leading/trailing whitespace
# Parse and evaluate the rule
expr = eval(rule, {"__builtins__": None}, item_to_bool)
return expr
def handle_optional_items(rule: str) -> str:
"""
Handle optional items in a rule, indicated by a '-' prefix.
Args:
rule (str): Rule string with optional items.
Returns:
str: Modified rule string with optional items removed.
"""
# Remove optional items prefixed by '-'
rule = re.sub(r'-\w+', '', rule)
return rule
def find_rules(definition: str) -> list:
"""
Find and extract rules from the definition string.
Args:
definition (str): Complex boolean logical string with multiple rules.
Returns:
list: List of extracted rules as strings.
"""
rules = []
stack = []
current_rule = ""
for char in definition:
if char == '(':
if stack:
current_rule += char
stack.append(char)
elif char == ')':
stack.pop()
if stack:
current_rule += char
else:
rules.append(current_rule.strip())
current_rule = ""
else:
if stack:
current_rule += char
return rules
def evaluate_definition(definition: str, items: set) -> dict:
"""
Evaluate a complex definition string involving multiple rules.
Args:
definition (str): Complex boolean logical string with multiple rules.
items (set): Set of items to check against the rules.
Returns:
dict: Dictionary with each rule and its evaluated result.
"""
# Extract individual rules from the definition
rules = re.findall(r'\([^\)]+\)', definition)
# Evaluate each rule
rule_results = {}
for rule in rules:
cleaned_rule = rule[1:-1] # Remove the outer parentheses
try:
result = evaluate_rule(cleaned_rule, items)
except SyntaxError as e:
# Handle syntax errors from eval() due to incorrect formatting
result = False
rule_results[rule] = result
return rule_results
# Define the rules
definition = '((K00925 K00625),K01895) (K00193+K00197+K00194) (K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584) (K00399+K00401+K00402) (K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125))'
# List of items to check against
items = {
'K00925',
# 'K00625',
# 'K01895',
'K00193',
'K00197',
'K00194',
'K00577',
'K00578',
'K00579',
'K00580',
'K00581',
'K00582',
'K00584',
'K00399',
'K00401',
'K00402',
'K22480',
'K22481',
'K22482',
'K03388',
'K03389',
'K03390',
'K08264',
'K08265',
'K14127',
'K14126',
# 'K14128',
'K22516',
# 'K00125'
}
# Evaluate the definition
result = evaluate_definition(definition, items)
result
{'((K00925 K00625)': False,
'(K00193+K00197+K00194)': True,
'(K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584)': True,
'(K00399+K00401+K00402)': True,
'(K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125)': False}
请注意,实现正在拆分第一条规则。
这是我期望的以下输出:
{
'(K00925 K00625),K01895)':False,
'(K00193+K00197+K00194)':True,
'(K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584)':True,
'(K00399+K00401+K00402)':True,
'(K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125)':False,
}
这是此复杂规则定义的信息流的图形表示:
这是一项艰难的任务。 我已经组装了一个递归解析器,但我必须停下来。 我将在这里留下代码作为不完整的片段,以显示我试图用什么替换该正则表达式,但没有完全实现您的逻辑。
这也很可能是一个糟糕的答案,因为它不尊重 python 的递归深度限制(默认为 1000),并且如果您需要深度递归,则需要重新设计。 如果你实现了 eval
,这
确实将问题分解为 python
evaluate_rule
可以直接处理的问题。 所以,我会把这个留给你考虑。
definition = '((K00925 K00625),K01895) (K00193+K00197+K00194) (K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584) (K00399+K00401+K00402) (K22480+K22481+K22482,K03388+K03389+K03390,K08264+K08265,K03388+K03389+K03390+K14127+(K14126+K14128,K22516+K00125))'
def scan_right(instr, start_index=0):
print("scanning...")
paren_count = 0
for ii, c in enumerate(instr[start_index:]):
if c == ')': paren_count -= 1
elif c == '(': paren_count += 1
# There's a paren match
if paren_count == 0:
return ii + start_index
raise IOError(f"Unmatched paren at character {start_index}")
def process(instr):
global FINAL_EVAL
if len(instr) <= 0: return instr
print(f"processing '{instr}'")
if instr[0] == ',': return " | " + process(instr[1:])
elif instr[0] == '+': return " & " + process(instr[1:])
elif instr[0] == ' ': return " & " + process(instr[1:]) #
index_open_paren = instr.find('(')
if index_open_paren >= 0:
# Recursive step, found
index_close_paren = scan_right(instr, index_open_paren)
return ( '(' + process(instr[index_open_paren+1:index_close_paren]) + ')'
+ process(instr[index_close_paren+1:])
)
else:
# Base case: No nested structure
eval_str = f"evaluate_rule('{instr}')"
print(f"generating {eval_str}")
return(eval_str)
result = process(definition)
print()
print(result)
result = """
((evaluate_rule('K00925 K00625')) | evaluate_rule('K01895')) & (evaluate_rule('K00193+K00197+K00194')) & (evaluate_rule('K00577+K00578+K00579+K00580+K00581-K00582-K00583+K00584')) & (evaluate_rule('K00399+K00401+K00402')) & ((evaluate_rule('K14126+K14128,K22516+K00125')))
"""