我为后缀表达式编写了分词器和递归解析器。我的代码如下:
import re
token_patterns = [
('OPERATOR', r'[+\-*/]'),
('NUMBER', r'\d+'),
('WHITESPACE', r'\s+'),
]
def tokenize(source_code):
tokens = []
source_code = source_code.strip()
while source_code:
matched = False
for token_type, pattern in token_patterns:
match = re.match(pattern, source_code)
if match:
value = match.group(0)
tokens.append((token_type, value))
source_code = source_code[len(value):].lstrip()
matched = True
break
if not matched:
raise ValueError(f"Invalid character in source code: {source_code[0]}")
return tokens
def parse_expression(tokens):
if not tokens:
return None
token_type, value = tokens.pop(0)
if token_type == 'NUMBER':
return int(value)
elif token_type == 'OPERATOR':
if value in ('+', '-', '*', '/'):
right = parse_expression(tokens)
left = parse_expression(tokens)
return (value, left, right)
else:
raise ValueError(f"Unexpected operator: {value}")
else:
raise ValueError(f"Unexpected token: {token_type}")
def evaluate_expression(expression):
if isinstance(expression, int):
return expression
elif isinstance(expression, tuple):
operator, left, right = expression
if operator == '+':
return evaluate_expression(left) + evaluate_expression(right)
elif operator == '-':
return evaluate_expression(left) - evaluate_expression(right)
elif operator == '*':
return evaluate_expression(left) * evaluate_expression(right)
elif operator == '/':
return evaluate_expression(left) / evaluate_expression(right)
else:
raise ValueError(f"Invalid expression: {expression}")
def main():
source_code = "2 3 4 * +"
tokens = tokenize(source_code)
parsed_expression = parse_expression(tokens)
print(f"Source code: {source_code}")
print(f"Parsed expression: {parsed_expression}")
result = evaluate_expression(parsed_expression)
print(f"Result: {result}")
if __name__ == "__main__":
main()
tokenize 函数的部分工作正常,给我:
[('NUMBER', '2'), ('NUMBER', '3'), ('NUMBER', '4'), ('OPERATOR', '*'), ('OPERATOR', '+')]
但我想回来
print(f"Parsed expression: {parsed_expression}")
类似这样的:
Parsed expression: ('+', 2, ('*', 3, 4))
但是它只打印 2。 另外,结果应该是 14,而我也得到了 2。我找不到错误。 有什么帮助吗?
作为参考,这里是 O'Reilly Media 出版的 “Python Cookbook” 书中的 递归下降解析器。
GitHub –writing_a_simple_recursive_descent_parser。
# example.py
#
# An example of writing a simple recursive descent parser
import re
import collections
# Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'
master_pat = re.compile('|'.join([NUM, PLUS, MINUS, TIMES,
DIVIDE, LPAREN, RPAREN, WS]))
# Tokenizer
Token = collections.namedtuple('Token', ['type','value'])
def generate_tokens(text):
scanner = master_pat.scanner(text)
for m in iter(scanner.match, None):
tok = Token(m.lastgroup, m.group())
if tok.type != 'WS':
yield tok
# Parser
class ExpressionEvaluator:
'''
Implementation of a recursive descent parser. Each method
implements a single grammar rule. Use the ._accept() method
to test and accept the current lookahead token. Use the ._expect()
method to exactly match and discard the next token on on the input
(or raise a SyntaxError if it doesn't match).
'''
def parse(self,text):
self.tokens = generate_tokens(text)
self.tok = None # Last symbol consumed
self.nexttok = None # Next symbol tokenized
self._advance() # Load first lookahead token
return self.expr()
def _advance(self):
'Advance one token ahead'
self.tok, self.nexttok = self.nexttok, next(self.tokens, None)
def _accept(self,toktype):
'Test and consume the next token if it matches toktype'
if self.nexttok and self.nexttok.type == toktype:
self._advance()
return True
else:
return False
def _expect(self,toktype):
'Consume next token if it matches toktype or raise SyntaxError'
if not self._accept(toktype):
raise SyntaxError('Expected ' + toktype)
# Grammar rules follow
def expr(self):
"expression ::= term { ('+'|'-') term }*"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
exprval += right
elif op == 'MINUS':
exprval -= right
return exprval
def term(self):
"term ::= factor { ('*'|'/') factor }*"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
termval *= right
elif op == 'DIVIDE':
termval /= right
return termval
def factor(self):
"factor ::= NUM | ( expr )"
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')
if __name__ == '__main__':
e = ExpressionEvaluator()
print(e.parse('2'))
print(e.parse('2 + 3'))
print(e.parse('2 + 3 * 4'))
print(e.parse('2 + (3 + 4) * 5'))
# Example of building trees
class ExpressionTreeBuilder(ExpressionEvaluator):
def expr(self):
"expression ::= term { ('+'|'-') term }"
exprval = self.term()
while self._accept('PLUS') or self._accept('MINUS'):
op = self.tok.type
right = self.term()
if op == 'PLUS':
exprval = ('+', exprval, right)
elif op == 'MINUS':
exprval = ('-', exprval, right)
return exprval
def term(self):
"term ::= factor { ('*'|'/') factor }"
termval = self.factor()
while self._accept('TIMES') or self._accept('DIVIDE'):
op = self.tok.type
right = self.factor()
if op == 'TIMES':
termval = ('*', termval, right)
elif op == 'DIVIDE':
termval = ('/', termval, right)
return termval
def factor(self):
'factor ::= NUM | ( expr )'
if self._accept('NUM'):
return int(self.tok.value)
elif self._accept('LPAREN'):
exprval = self.expr()
self._expect('RPAREN')
return exprval
else:
raise SyntaxError('Expected NUMBER or LPAREN')
if __name__ == '__main__':
e = ExpressionTreeBuilder()
print(e.parse('2 + 3'))
print(e.parse('2 + 3 * 4'))
print(e.parse('2 + (3 + 4) * 5'))
print(e.parse('2 + 3 + 4'))