免责声明:这是一个更大的解析器的一部分 - 简化以解释我的问题
我得到的情况是,我可以拥有 3 种类型的标识符起始 rhs 值 -
.
和 :
只是以标识符开头的规则的示例,然后以某种方式有所不同 - 结构副本也仅用于此示例
所以它可以只是
id
、id.id
或 id:id
问题是 id 规则排在第一位并规定了此时需要什么 - 我是否需要将我的规则组合成一个以获得不同的输出?到目前为止,我认为以 id 开头的 3 条规则将作为某种规则组合来处理 - 但似乎并非如此
// #define BOOST_SPIRIT_DEBUG
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <variant>
namespace qi = boost::spirit::qi;
namespace Ast
{
struct Identifier : std::string
{
};
struct QualifiedId1
{
Identifier a, b;
};
struct QualifiedId2
{
Identifier a, b;
};
using Variants = std::variant<Identifier, QualifiedId1, QualifiedId2>;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT( Ast::QualifiedId1, a, b )
BOOST_FUSION_ADAPT_STRUCT( Ast::QualifiedId2, a, b )
using It = std::string_view::const_iterator;
template <typename Attr, typename Skipper>
void run_tests( qi::rule<It, Attr(), Skipper> const& rule_, std::vector<std::string> const& tests )
{
std::cout << "======[ " << boost::core::demangle( typeid( Attr ).name() ) << " ]======" << std::endl;
for( std::string_view test : tests )
{
It f = test.begin(), l = test.end();
try
{
std::cout << "Parsing: " << quoted( test );
Attr v;
bool ok;
if constexpr( std::is_same_v<Skipper, qi::unused_type> )
ok = qi::parse( f, l, qi::eps > rule_ > qi::eoi, v );
else
ok = qi::phrase_parse( f, l, qi::eps > rule_ > qi::eoi, Skipper{}, v );
std::cout << ( ok ? "OK" : "FAIL" ) << " Remaining: " << quoted( std::string( f, l ) ) << "\n";
}
catch( qi::expectation_failure<It> const& ef )
{
auto p = ef.first - test.begin();
auto bol = test.find_last_of( "\r\n", p ) + 1;
auto line = std::count( f, f + bol, '\n' ) + 1;
auto eol = test.find_first_of( "\r\n", p );
std::cout << " -> EXPECTED " << ef.what_ << " in line:" << line << " col:" << ( p - bol ) << "\n"
<< " " << test.substr( bol, eol - bol ) << "\n"
<< " " << std::setw( p - bol ) << "" << "^--- here\n";
}
std::cout << "--------------------\n";
}
}
int main()
{
qi::rule<It, Ast::Identifier()> id = qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z0-9_" );
qi::rule<It, Ast::QualifiedId1(), qi::space_type> qualId1 = id > "." > id;
qi::rule<It, Ast::QualifiedId2(), qi::space_type> qualId2 = id > ":" > id;
qi::rule<It, Ast::Variants(), qi::space_type> variants = ( id ) | ( qualId1 ) | ( qualId1 );
run_tests( variants, { "abc", "abc.def", "abc:def" } );
}
输出 - 首先处理 id 规则,然后期望失败
======[ class std::variant<struct Ast::Identifier,struct Ast::QualifiedId1,struct Ast::QualifiedId2> ]======
Parsing: "abc"OK Remaining: ""
--------------------
Parsing: "abc.def" -> EXPECTED <eoi> in line:1 col:3
abc.def
^--- here
--------------------
Parsing: "abc:def" -> EXPECTED <eoi> in line:1 col:3
abc:def
^--- here
--------------------
我尝试将这 3 条规则结合起来
qi::rule<It, Ast::Variants(), qi::space_type> variants2 = id > *( ( "." > id ) | ( ":" > id ) );
解析但只给出变体中所有 3 个的 id 类型 下面的 id 只是一个例子,通常后面有不同的规则(比如只有数字或固定字符串等) - 所以不仅仅是
.
和 :
作为区别
PEG 语法是从左到右贪婪的。
(a | ab)
永远不会解析 ab。
此外,期望点不符合周围规则,而不仅仅是包含表达式。如果需要回溯,则不应使用期望点。
修复这些问题:
// #define BOOST_SPIRIT_DEBUG
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <variant>
namespace qi = boost::spirit::qi;
namespace Ast {
struct Identifier : std::string {
using std::string::string;
using std::string::operator=;
};
struct QualifiedId1 { Identifier a, b; };
struct QualifiedId2 { Identifier a, b; };
using Variants = std::variant<Identifier, QualifiedId1, QualifiedId2>;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::QualifiedId1, a, b)
BOOST_FUSION_ADAPT_STRUCT(Ast::QualifiedId2, a, b)
using It = std::string_view::const_iterator;
template <typename Attr, typename Skipper>
void run_tests(qi::rule<It, Attr(), Skipper> const& rule_, std::vector<std::string> const& tests) {
using boost::core::demangle;
std::cout << "======[ " << demangle(typeid(Attr).name()) << " ]======" << std::endl;
for (std::string_view test : tests) {
It f = test.begin(), l = test.end();
try {
std::cout << "Parsing: " << quoted(test) << " -> ";
Attr v;
bool ok;
if constexpr (std::is_same_v<Skipper, qi::unused_type>)
ok = parse(f, l, qi::eps > rule_ > qi::eoi, v);
else
ok = phrase_parse(f, l, qi::eps > rule_ > qi::eoi, Skipper{}, v);
std::cout << (ok ? "OK" : "FAIL") //
<< " Remaining: " << quoted(std::string(f, l)) //
<< " Variant #" << v.index() << "\n";
} catch (qi::expectation_failure<It> const& ef) {
auto p = ef.first - test.begin();
auto bol = test.find_last_of( "\r\n", p ) + 1;
auto line = std::count( f, f + bol, '\n' ) + 1;
auto eol = test.find_first_of( "\r\n", p );
std::cout << "EXPECTED " << ef.what_ << " in line:" << line << " col:" << ( p - bol ) << "\n"
<< " " << test.substr( bol, eol - bol ) << "\n"
<< " " << std::setw( p - bol ) << "" << "^--- here\n";
}
}
}
template <typename Attr> using Rule = qi::rule<It, Attr(), qi::space_type>;
int main() {
qi::rule<It, Ast::Identifier()> id = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_");
Rule<Ast::QualifiedId1> qualId1 = id >> ("." > id);
Rule<Ast::QualifiedId2> qualId2 = id >> (":" > id);
Rule<Ast::Variants> variants = qualId1 | qualId2 | id;
run_tests( variants, { "abc", "abc.def", "abc:def" } );
}
打印
======[ std::variant<Ast::Identifier, Ast::QualifiedId1, Ast::QualifiedId2> ]======
Parsing: "abc" -> OK Remaining: "" Variant #0
Parsing: "abc.def" -> OK Remaining: "" Variant #1
Parsing: "abc:def" -> OK Remaining: "" Variant #2
有时您可以混合前瞻断言以实现更严格的期望:
keywordXYZ >> (&qualId2 >> id) // will not parse qualid2
一般来说,如果
b
与 a
的子集匹配,则可以使棘手的情况 (a | bc)
与 (a - b | bc)
一起使用,或者等效地 (!b >> a | bc)