From e37f8efd6e9840d9958996ac0cf05f8e449c6020 Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 30 Dec 2024 14:30:15 +0100 Subject: [PATCH] Relax parsing rules and fix minor issues --- code/query/expression.hxx | 4 +- code/query/logical-and.hxx | 2 +- code/query/logical-or.hxx | 2 +- code/query/match.cxx | 8 ++- code/query/parse.cxx | 3 +- code/query/parse.hxx | 2 +- code/query/query.cxx | 14 ++++-- code/query/query.hxx | 6 +-- code/query/syntactical-analyzer.cxx | 77 ++++++++++++++++++++++------- code/query/syntactical-analyzer.hxx | 2 +- code/query/token.cxx | 52 +++++++++++++++++++ code/query/token.hxx | 5 ++ 12 files changed, 141 insertions(+), 36 deletions(-) diff --git a/code/query/expression.hxx b/code/query/expression.hxx index 73adad3..2ab8b78 100644 --- a/code/query/expression.hxx +++ b/code/query/expression.hxx @@ -23,11 +23,11 @@ public: friend void - accept(expression_t const& e, visitor_t& v); + accept(expression_t const&, visitor_t&); friend string - to_string(expression_t const& e); + to_string(expression_t const&); private: struct abstract_t diff --git a/code/query/logical-and.hxx b/code/query/logical-and.hxx index 7180c78..6088e6f 100644 --- a/code/query/logical-and.hxx +++ b/code/query/logical-and.hxx @@ -32,7 +32,7 @@ namespace code::query }; string - to_string(logical_and_t const); + to_string(logical_and_t const&); } // namespace code::query diff --git a/code/query/logical-or.hxx b/code/query/logical-or.hxx index 603f3ba..2e8cf7d 100644 --- a/code/query/logical-or.hxx +++ b/code/query/logical-or.hxx @@ -32,7 +32,7 @@ namespace code::query }; string - to_string(logical_or_t const); + to_string(logical_or_t const&); } // namespace code::query diff --git a/code/query/match.cxx b/code/query/match.cxx index c9b6bcc..b999647 100644 --- a/code/query/match.cxx +++ b/code/query/match.cxx @@ -25,10 +25,14 @@ namespace code::query public: static bool - match(expression_t const& e, predicate_t const& p) + match(optional const& e, predicate_t const& p) { matcher_t m{p}; - accept(e, m); + + if (e) { + accept(*e, m); + } + return m.result; } diff --git a/code/query/parse.cxx b/code/query/parse.cxx index beb2a10..411951a 100644 --- a/code/query/parse.cxx +++ b/code/query/parse.cxx @@ -6,11 +6,10 @@ namespace code::query { - optional + query_t try_parse(string const& q, parse_context_t& context) { string_lexical_analyzer_t lexer{q.begin(), q.end()}; - return syntactical_analyzer_t{lexer, context}.try_parse(); } diff --git a/code/query/parse.hxx b/code/query/parse.hxx index bab6f99..c956377 100644 --- a/code/query/parse.hxx +++ b/code/query/parse.hxx @@ -8,7 +8,7 @@ namespace code::query { - optional + query_t try_parse(string const&, parse_context_t&); } // namespace code::query diff --git a/code/query/query.cxx b/code/query/query.cxx index 178ba87..666bb33 100644 --- a/code/query/query.cxx +++ b/code/query/query.cxx @@ -4,7 +4,7 @@ namespace code::query { query_t:: - query_t(expression_t e, + query_t(optional e, vector warnings, vector errors) : expr_{move(e)}, @@ -12,7 +12,7 @@ namespace code::query errors_{move(errors)} {} - expression_t const& + optional const& query_t:: expr() const { @@ -36,13 +36,19 @@ namespace code::query void accept(query_t const& q, visitor_t& v) { - accept(q.expr(), v); + if (q.expr()) { + accept(*q.expr(), v); + } } string to_string(query_t const& q) { - return to_string(q.expr()); + if (q.expr()) { + return to_string(*q.expr()); + } + + return string{}; } } // namespace code::query diff --git a/code/query/query.hxx b/code/query/query.hxx index ff67d4c..77b4c89 100644 --- a/code/query/query.hxx +++ b/code/query/query.hxx @@ -12,11 +12,11 @@ namespace code::query class query_t { public: - query_t(expression_t e, + query_t(optional e, vector warnings, vector errors); - expression_t const& + optional const& expr() const; vector const& @@ -26,7 +26,7 @@ namespace code::query errors() const; private: - expression_t expr_; + optional expr_; vector warnings_; vector errors_; diff --git a/code/query/syntactical-analyzer.cxx b/code/query/syntactical-analyzer.cxx index 8ce2ec0..ffce29a 100644 --- a/code/query/syntactical-analyzer.cxx +++ b/code/query/syntactical-analyzer.cxx @@ -31,7 +31,7 @@ namespace code::query return context_; } - optional + query_t syntactical_analyzer_t:: try_parse() { @@ -43,15 +43,13 @@ namespace code::query if (last.type() != token_type_t::end) { context().report_warning({{}, "trailing token at end of query"}); } - - return query_t{ - *move(expr), - context().warnings(), - context().errors() - }; } - return nullopt; + return query_t{ + move(expr), + context().warnings(), + context().errors() + }; } optional @@ -63,7 +61,11 @@ namespace code::query for (;;) { auto t = lexer().peek(); - if (t.type() == token_type_t::logical_not) { + if (t.type() == token_type_t::end) { + break; + } + + else if (t.type() == token_type_t::logical_not) { lexer().consume(); auto rhs = try_parse_primary_expression(); @@ -75,7 +77,7 @@ namespace code::query if (expr) { expr = logical_and_t{ - source_location_t{}, *expr, logical_not_t{{}, *rhs} + {}, *expr, logical_not_t{{}, *rhs} }; } else { @@ -91,6 +93,12 @@ namespace code::query if (lexer().peek().type() == token_type_t::colon) { lexer().consume(); + // consume trailing colons as well. + // + while (lexer().peek().type() == token_type_t::colon) { + lexer().consume(); + } + auto identifier = *t.value(); // Next expect a term or quoted term. @@ -100,7 +108,8 @@ namespace code::query if (t.type() != token_type_t::simple_term && t.type() != token_type_t::quoted_term) { context().report_warning({{}, "expected simple-term or quoted-term after tag" }); - break; + lexer().consume(); + continue; } lexer().consume(); @@ -114,7 +123,7 @@ namespace code::query }; if (expr) { - expr = logical_and_t{ + expr = logical_or_t{ {}, *expr, tag_t{{}, move(identifier), move(term)} }; } @@ -123,7 +132,7 @@ namespace code::query } } else if (expr) { - expr = logical_and_t{ + expr = logical_or_t{ {}, *expr, term_t{{}, term_t::simple, *t.value()} }; } @@ -134,7 +143,7 @@ namespace code::query else if (t.type() == token_type_t::quoted_term) { if (expr) { - expr = logical_and_t{ + expr = logical_or_t{ {}, *expr, term_t{{}, term_t::quoted, *t.value()} }; } @@ -159,23 +168,24 @@ namespace code::query if (!next_expr) { context().report_warning({{}, "expected expression inside parenthesis"}); - break; + lexer().consume(); + continue; } next_expr = parenthesized_t{{}, *next_expr}; if (expr) { - expr = logical_and_t{{}, *expr, *next_expr}; + expr = logical_or_t{{}, *expr, *next_expr}; } else { expr = next_expr; } t = lexer().peek(); - if (t.type() != token_type_t::close_parens) { context().report_warning({{}, "expected end parenthesis"}); - break; + lexer().consume(); + continue; } lexer().consume(); @@ -195,8 +205,9 @@ namespace code::query { auto lhs = try_parse_primary_expression(); - if (!lhs) + if (!lhs) { return nullopt; + } while (lexer().peek().type() == token_type_t::logical_and) { lexer().consume(); @@ -239,6 +250,34 @@ namespace code::query syntactical_analyzer_t:: try_parse_expression() { + auto invalid = [](token_t const& token) + { + if (token.type() == token_type_t::end) { + return false; + } + + if (token.type() == token_type_t::simple_term) { + return false; + } + + if (token.type() == token_type_t::quoted_term) { + return false; + } + + if (token.type() == token_type_t::open_parens) { + return false; + } + + return true; + }; + + // filter leading invalid tokens. + // + while (invalid(lexer().peek())) { + context().report_warning({{}, "invalid leading token"}); + lexer().consume(); + } + return try_parse_logical_or(); } diff --git a/code/query/syntactical-analyzer.hxx b/code/query/syntactical-analyzer.hxx index b4d7c11..76162d8 100644 --- a/code/query/syntactical-analyzer.hxx +++ b/code/query/syntactical-analyzer.hxx @@ -21,7 +21,7 @@ namespace code::query parse_context_t& context(); - optional + query_t try_parse(); private: diff --git a/code/query/token.cxx b/code/query/token.cxx index bd3d90f..00ae986 100644 --- a/code/query/token.cxx +++ b/code/query/token.cxx @@ -23,4 +23,56 @@ namespace code::query return value_; } + std::ostream& + operator<<(std::ostream& o, token_t const& token) + { + switch (token.type()) { + case token_type_t::end: + o << "end"; + break; + + case token_type_t::simple_term: + o << "simple-term"; + break; + + case token_type_t::quoted_term: + o << "quoted-term"; + break; + + case token_type_t::colon: + o << "colon"; + break; + + case token_type_t::logical_and: + o << "logical-and"; + break; + + case token_type_t::logical_not: + o << "logical-not"; + break; + + case token_type_t::logical_or: + o << "logical-or"; + break; + + case token_type_t::open_parens: + o << "open-parens"; + break; + + case token_type_t::close_parens: + o << "close-parens"; + break; + + + } + + if (auto v = token.value(); v) { + o << ": " << *v; + } + + o << '\n'; + + return o; + } + } // namespace code::query diff --git a/code/query/token.hxx b/code/query/token.hxx index 403488d..ba1eb55 100644 --- a/code/query/token.hxx +++ b/code/query/token.hxx @@ -3,6 +3,8 @@ #include +#include + namespace code::query { @@ -42,6 +44,9 @@ namespace code::query }; + std::ostream& + operator<<(std::ostream&, token_t const&); + } // namespace code::query #endif