Skip to main content

rustledger_query/
parser.rs

1//! BQL Parser implementation.
2//!
3//! Uses chumsky for parser combinators.
4
5use chumsky::prelude::*;
6use rust_decimal::Decimal;
7use std::str::FromStr;
8
9use crate::ast::{
10    BalancesQuery, BinaryOperator, ColumnDef, CreateTableStmt, Expr, FromClause, FunctionCall,
11    InsertSource, InsertStmt, JournalQuery, Literal, OrderSpec, PrintQuery, Query, SelectQuery,
12    SortDirection, Target, UnaryOperator, WindowFunction, WindowSpec,
13};
14use crate::error::{ParseError, ParseErrorKind};
15use rustledger_core::NaiveDate;
16
17type ParserInput<'a> = &'a str;
18type ParserExtra<'a> = extra::Err<Rich<'a, char>>;
19
20/// Helper enum for parsing comparison suffix (BETWEEN, IN, or binary comparison).
21enum ComparisonSuffix {
22    Between(Expr, Expr),
23    Binary(BinaryOperator, Expr),
24    /// IN with right-hand side (set literal or expression).
25    In(Expr),
26    /// NOT IN with right-hand side (set literal or expression).
27    NotIn(Expr),
28}
29
30/// Maximum nesting depth of parenthesized expressions / subqueries the
31/// parser accepts. The recursive (chumsky) grammar descends one stack
32/// frame per open paren, so without a bound a deeply-nested input (e.g.
33/// `(((((…`) overflows the stack and parses in super-linear time — a
34/// denial-of-service surfaced by the query fuzzer's slow-unit corpus
35/// (a ~2 KB input of
36/// 1023 nested parens took >10 s and overflowed an 8 MB stack). Real
37/// queries nest only a handful deep; 128 is far above any legitimate
38/// query yet shallow enough to stay fast and stack-safe.
39const MAX_NESTING_DEPTH: usize = 128;
40
41/// Byte offset at which parenthesis nesting first exceeds
42/// [`MAX_NESTING_DEPTH`], or `None` if the input stays within the bound.
43///
44/// Parens inside string literals (`"..."` / `'...'`, with `\` escapes)
45/// are skipped so they don't count — matching the grammar, which treats
46/// them as opaque string bytes rather than expression delimiters.
47fn nesting_exceeds_limit(source: &str) -> Option<usize> {
48    let mut depth: usize = 0;
49    let mut chars = source.char_indices();
50    while let Some((i, c)) = chars.next() {
51        match c {
52            '"' | '\'' => {
53                // Consume the string body so its parens don't count.
54                while let Some((_, sc)) = chars.next() {
55                    if sc == '\\' {
56                        chars.next(); // skip the escaped char
57                    } else if sc == c {
58                        break;
59                    }
60                }
61            }
62            '(' => {
63                depth += 1;
64                if depth > MAX_NESTING_DEPTH {
65                    return Some(i);
66                }
67            }
68            ')' => depth = depth.saturating_sub(1),
69            _ => {}
70        }
71    }
72    None
73}
74
75/// Parse a BQL query string.
76///
77/// # Errors
78///
79/// Returns a `ParseError` if the query string is malformed, or if
80/// parenthesis nesting exceeds the internal nesting limit (rejected up
81/// front to bound parse time and stack depth).
82pub fn parse(source: &str) -> Result<Query, ParseError> {
83    if let Some(offset) = nesting_exceeds_limit(source) {
84        return Err(ParseError::new(
85            ParseErrorKind::SyntaxError(format!(
86                "expression nesting too deep (exceeds maximum of {MAX_NESTING_DEPTH})"
87            )),
88            offset,
89        ));
90    }
91
92    let (result, errs) = query_parser()
93        .then_ignore(ws())
94        .then_ignore(end())
95        .parse(source)
96        .into_output_errors();
97
98    if let Some(query) = result {
99        Ok(query)
100    } else {
101        let err = errs.first().map(|e| {
102            let kind = if e.found().is_none() {
103                ParseErrorKind::UnexpectedEof
104            } else {
105                ParseErrorKind::SyntaxError(e.to_string())
106            };
107            ParseError::new(kind, e.span().start)
108        });
109        Err(err.unwrap_or_else(|| ParseError::new(ParseErrorKind::UnexpectedEof, 0)))
110    }
111}
112
113/// Parse whitespace (spaces, tabs, newlines).
114fn ws<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserExtra<'a>> + Clone {
115    one_of(" \t\r\n").repeated().ignored()
116}
117
118/// Parse required whitespace.
119fn ws1<'a>() -> impl Parser<'a, ParserInput<'a>, (), ParserExtra<'a>> + Clone {
120    one_of(" \t\r\n").repeated().at_least(1).ignored()
121}
122
123/// Case-insensitive keyword parser.
124fn kw<'a>(keyword: &'static str) -> impl Parser<'a, ParserInput<'a>, (), ParserExtra<'a>> + Clone {
125    text::ident().try_map(move |s: &str, span| {
126        if s.eq_ignore_ascii_case(keyword) {
127            Ok(())
128        } else {
129            Err(Rich::custom(span, format!("expected keyword '{keyword}'")))
130        }
131    })
132}
133
134/// Parse digits.
135fn digits<'a>() -> impl Parser<'a, ParserInput<'a>, &'a str, ParserExtra<'a>> + Clone {
136    one_of("0123456789").repeated().at_least(1).to_slice()
137}
138
139/// Parse the main query.
140fn query_parser<'a>() -> impl Parser<'a, ParserInput<'a>, Query, ParserExtra<'a>> {
141    ws().ignore_then(choice((
142        create_table_stmt().map(Query::CreateTable),
143        insert_stmt().map(Query::Insert),
144        select_query().map(|sq| Query::Select(Box::new(sq))),
145        journal_query().map(Query::Journal),
146        balances_query().map(Query::Balances),
147        print_query().map(Query::Print),
148    )))
149    .then_ignore(ws())
150    .then_ignore(just(';').or_not())
151}
152
153/// Parse a SELECT query with optional subquery support.
154fn select_query<'a>() -> impl Parser<'a, ParserInput<'a>, SelectQuery, ParserExtra<'a>> {
155    recursive(|select_parser| {
156        // Subquery in FROM clause: FROM (SELECT ...)
157        let subquery_from = ws1()
158            .ignore_then(kw("FROM"))
159            .ignore_then(ws1())
160            .ignore_then(just('('))
161            .ignore_then(ws())
162            .ignore_then(select_parser)
163            .then_ignore(ws())
164            .then_ignore(just(')'))
165            .map(|sq| Some(FromClause::from_subquery(sq)));
166
167        // Table name FROM clause: FROM tablename (where tablename is not a keyword)
168        // A table name is an identifier followed by WHERE/GROUP/ORDER/HAVING/LIMIT/PIVOT or end
169        // Supports system tables like #prices, #entries
170        let table_from = ws1()
171            .ignore_then(kw("FROM"))
172            .ignore_then(ws1())
173            .ignore_then(table_identifier().try_map(|name, span| {
174                // Check if this looks like a table name (uppercase convention or doesn't look like account)
175                // Table names should not contain ':' which accounts have
176                // System tables starting with '#' are always valid
177                if !name.starts_with('#') && name.contains(':') {
178                    Err(Rich::custom(
179                        span,
180                        "table names cannot contain ':' - this looks like an account filter expression",
181                    ))
182                } else {
183                    Ok(name)
184                }
185            }))
186            .then_ignore(
187                // Must be followed by WHERE, GROUP, ORDER, HAVING, LIMIT, PIVOT, or end
188                ws().then(choice((
189                    kw("WHERE").ignored(),
190                    kw("GROUP").ignored(),
191                    kw("ORDER").ignored(),
192                    kw("HAVING").ignored(),
193                    kw("LIMIT").ignored(),
194                    kw("PIVOT").ignored(),
195                    end().ignored(),
196                )))
197                .rewind(),
198            )
199            .map(|name| Some(FromClause::from_table(name)));
200
201        // Regular FROM clause
202        let regular_from = from_clause().map(Some);
203
204        kw("SELECT")
205            .ignore_then(ws1())
206            .ignore_then(
207                kw("DISTINCT")
208                    .then_ignore(ws())
209                    .or_not()
210                    .map(|d| d.is_some()),
211            )
212            .then(targets())
213            .then(
214                subquery_from
215                    .or(table_from)
216                    .or(regular_from)
217                    .or_not()
218                    .map(std::option::Option::flatten),
219            )
220            .then(where_clause().or_not())
221            .then(group_by_clause().or_not())
222            .then(having_clause().or_not())
223            // Clause order matches bean-query (#1034): ORDER BY before
224            // PIVOT BY. Pre-#1034 rledger had PIVOT BY before ORDER BY,
225            // which is bean-query-incompatible — flipping the order here
226            // gives upstream parity for queries that use both.
227            .then(order_by_clause().or_not())
228            .then(pivot_by_clause().or_not())
229            .then(limit_clause().or_not())
230            .map(
231                |(
232                    (
233                        (
234                            (((((distinct, targets), from), where_clause), group_by), having),
235                            order_by,
236                        ),
237                        pivot_by,
238                    ),
239                    limit,
240                )| {
241                    SelectQuery {
242                        distinct,
243                        targets,
244                        from,
245                        where_clause,
246                        group_by,
247                        having,
248                        pivot_by,
249                        order_by,
250                        limit,
251                    }
252                },
253            )
254    })
255}
256
257/// Parse FROM clause.
258fn from_clause<'a>() -> impl Parser<'a, ParserInput<'a>, FromClause, ParserExtra<'a>> + Clone {
259    ws1()
260        .ignore_then(kw("FROM"))
261        .ignore_then(ws1())
262        .ignore_then(from_modifiers())
263}
264
265/// Parse target expressions.
266fn targets<'a>() -> impl Parser<'a, ParserInput<'a>, Vec<Target>, ParserExtra<'a>> + Clone {
267    target()
268        .separated_by(ws().then(just(',')).then(ws()))
269        .at_least(1)
270        .collect()
271}
272
273/// Parse a single target.
274fn target<'a>() -> impl Parser<'a, ParserInput<'a>, Target, ParserExtra<'a>> + Clone {
275    expr()
276        .then(
277            ws1()
278                .ignore_then(kw("AS"))
279                .ignore_then(ws1())
280                .ignore_then(identifier())
281                .or_not(),
282        )
283        .map(|(expr, alias)| Target { expr, alias })
284}
285
286/// Parse FROM modifiers (OPEN ON, CLOSE ON, CLEAR, filter).
287fn from_modifiers<'a>() -> impl Parser<'a, ParserInput<'a>, FromClause, ParserExtra<'a>> + Clone {
288    let open_on = kw("OPEN")
289        .ignore_then(ws1())
290        .ignore_then(kw("ON"))
291        .ignore_then(ws1())
292        .ignore_then(date_literal())
293        .then_ignore(ws());
294
295    let close_on = kw("CLOSE")
296        .ignore_then(ws().then(kw("ON")).then(ws()).or_not())
297        .ignore_then(date_literal())
298        .then_ignore(ws());
299
300    let clear = kw("CLEAR").then_ignore(ws());
301
302    // Parse modifiers in order: OPEN ON, CLOSE ON, CLEAR, filter
303    // Or just a table name for user-created tables
304    open_on
305        .or_not()
306        .then(close_on.or_not())
307        .then(clear.or_not().map(|c| c.is_some()))
308        .then(from_filter().or_not())
309        .map(|(((open_on, close_on), clear), filter)| FromClause {
310            open_on,
311            close_on,
312            clear,
313            filter,
314            subquery: None,
315            table_name: None,
316        })
317}
318
319/// Parse FROM filter expression (predicates).
320fn from_filter<'a>() -> impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone {
321    expr()
322}
323
324/// Parse WHERE clause.
325fn where_clause<'a>() -> impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone {
326    ws1()
327        .ignore_then(kw("WHERE"))
328        .ignore_then(ws1())
329        .ignore_then(expr())
330}
331
332/// Parse GROUP BY clause.
333fn group_by_clause<'a>() -> impl Parser<'a, ParserInput<'a>, Vec<Expr>, ParserExtra<'a>> + Clone {
334    ws1()
335        .ignore_then(kw("GROUP"))
336        .ignore_then(ws1())
337        .ignore_then(kw("BY"))
338        .ignore_then(ws1())
339        .ignore_then(
340            expr()
341                .separated_by(ws().then(just(',')).then(ws()))
342                .at_least(1)
343                .collect(),
344        )
345}
346
347/// Parse HAVING clause (filter on aggregated results).
348fn having_clause<'a>() -> impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone {
349    ws1()
350        .ignore_then(kw("HAVING"))
351        .ignore_then(ws1())
352        .ignore_then(expr())
353}
354
355/// Parse PIVOT BY clause (pivot table transformation).
356fn pivot_by_clause<'a>() -> impl Parser<'a, ParserInput<'a>, Vec<Expr>, ParserExtra<'a>> + Clone {
357    ws1()
358        .ignore_then(kw("PIVOT"))
359        .ignore_then(ws1())
360        .ignore_then(kw("BY"))
361        .ignore_then(ws1())
362        .ignore_then(
363            expr()
364                .separated_by(ws().then(just(',')).then(ws()))
365                .at_least(1)
366                .collect(),
367        )
368}
369
370/// Parse ORDER BY clause.
371fn order_by_clause<'a>() -> impl Parser<'a, ParserInput<'a>, Vec<OrderSpec>, ParserExtra<'a>> + Clone
372{
373    ws1()
374        .ignore_then(kw("ORDER"))
375        .ignore_then(ws1())
376        .ignore_then(kw("BY"))
377        .ignore_then(ws1())
378        .ignore_then(
379            order_spec()
380                .separated_by(ws().then(just(',')).then(ws()))
381                .at_least(1)
382                .collect(),
383        )
384}
385
386/// Parse a single ORDER BY spec.
387fn order_spec<'a>() -> impl Parser<'a, ParserInput<'a>, OrderSpec, ParserExtra<'a>> + Clone {
388    expr()
389        .then(
390            ws1()
391                .ignore_then(choice((
392                    kw("ASC").to(SortDirection::Asc),
393                    kw("DESC").to(SortDirection::Desc),
394                )))
395                .or_not(),
396        )
397        .map(|(expr, dir)| OrderSpec {
398            expr,
399            direction: dir.unwrap_or_default(),
400        })
401}
402
403/// Parse LIMIT clause.
404fn limit_clause<'a>() -> impl Parser<'a, ParserInput<'a>, u64, ParserExtra<'a>> + Clone {
405    ws1()
406        .ignore_then(kw("LIMIT"))
407        .ignore_then(ws1())
408        .ignore_then(integer())
409        .map(|n| n as u64)
410}
411
412/// Parse JOURNAL query.
413fn journal_query<'a>() -> impl Parser<'a, ParserInput<'a>, JournalQuery, ParserExtra<'a>> + Clone {
414    kw("JOURNAL")
415        .ignore_then(
416            // Account pattern is optional - can be JOURNAL or JOURNAL "pattern"
417            ws1().ignore_then(string_literal()).or_not(),
418        )
419        .then(at_function().or_not())
420        .then(
421            ws1()
422                .ignore_then(kw("FROM"))
423                .ignore_then(ws1())
424                .ignore_then(from_modifiers())
425                .or_not(),
426        )
427        .map(|((account_pattern, at_function), from)| JournalQuery {
428            account_pattern: account_pattern.unwrap_or_default(),
429            at_function,
430            from,
431        })
432}
433
434/// Parse BALANCES query.
435fn balances_query<'a>() -> impl Parser<'a, ParserInput<'a>, BalancesQuery, ParserExtra<'a>> + Clone
436{
437    // Use rewind-based lookahead so optional clauses don't consume whitespace
438    // that subsequent clauses need. Without this, `BALANCES WHERE ...` fails
439    // because at_function() consumes whitespace before failing on "WHERE" != "AT".
440    let at_fn = ws1().then(kw("AT")).rewind().ignore_then(at_function());
441
442    let from = ws1().then(kw("FROM")).rewind().ignore_then(
443        ws1()
444            .ignore_then(kw("FROM"))
445            .ignore_then(ws1())
446            .ignore_then(from_modifiers()),
447    );
448
449    kw("BALANCES")
450        .ignore_then(at_fn.or_not())
451        .then(from.or_not())
452        .then(where_clause().or_not())
453        .map(|((at_function, from), where_clause)| BalancesQuery {
454            at_function,
455            from,
456            where_clause,
457        })
458}
459
460/// Parse PRINT query.
461fn print_query<'a>() -> impl Parser<'a, ParserInput<'a>, PrintQuery, ParserExtra<'a>> + Clone {
462    kw("PRINT")
463        .ignore_then(
464            ws1()
465                .ignore_then(kw("FROM"))
466                .ignore_then(ws1())
467                .ignore_then(from_modifiers())
468                .or_not(),
469        )
470        .map(|from| PrintQuery { from })
471}
472
473/// Parse CREATE TABLE statement.
474fn create_table_stmt<'a>() -> impl Parser<'a, ParserInput<'a>, CreateTableStmt, ParserExtra<'a>> {
475    // CREATE TABLE name (col1, col2, ...) or CREATE TABLE name AS SELECT ...
476    let column_def = identifier()
477        .then(ws().ignore_then(identifier()).or_not())
478        .map(|(name, type_hint)| ColumnDef { name, type_hint });
479
480    let column_list = just('(')
481        .ignore_then(ws())
482        .ignore_then(
483            column_def
484                .separated_by(ws().ignore_then(just(',')).then_ignore(ws()))
485                .collect::<Vec<_>>(),
486        )
487        .then_ignore(ws())
488        .then_ignore(just(')'));
489
490    let as_select = ws1()
491        .ignore_then(kw("AS"))
492        .ignore_then(ws1())
493        .ignore_then(select_query())
494        .map(Box::new);
495
496    kw("CREATE")
497        .ignore_then(ws1())
498        .ignore_then(kw("TABLE"))
499        .ignore_then(ws1())
500        .ignore_then(identifier())
501        .then(ws().ignore_then(column_list).or_not())
502        .then(as_select.or_not())
503        .map(|((table_name, columns), as_select)| CreateTableStmt {
504            table_name,
505            columns: columns.unwrap_or_default(),
506            as_select,
507        })
508}
509
510/// Parse INSERT statement.
511fn insert_stmt<'a>() -> impl Parser<'a, ParserInput<'a>, InsertStmt, ParserExtra<'a>> {
512    // Column list: (col1, col2, ...)
513    let column_list = just('(')
514        .ignore_then(ws())
515        .ignore_then(
516            identifier()
517                .separated_by(ws().ignore_then(just(',')).then_ignore(ws()))
518                .collect::<Vec<_>>(),
519        )
520        .then_ignore(ws())
521        .then_ignore(just(')'));
522
523    // VALUES clause: VALUES (v1, v2), (v3, v4), ...
524    let value_row = just('(')
525        .ignore_then(ws())
526        .ignore_then(
527            expr()
528                .separated_by(ws().ignore_then(just(',')).then_ignore(ws()))
529                .collect::<Vec<_>>(),
530        )
531        .then_ignore(ws())
532        .then_ignore(just(')'));
533
534    let values_source = kw("VALUES")
535        .ignore_then(ws())
536        .ignore_then(
537            value_row
538                .separated_by(ws().ignore_then(just(',')).then_ignore(ws()))
539                .collect::<Vec<_>>(),
540        )
541        .map(InsertSource::Values);
542
543    // SELECT as source
544    let select_source = select_query().map(|sq| InsertSource::Select(Box::new(sq)));
545
546    let source = choice((values_source, select_source));
547
548    kw("INSERT")
549        .ignore_then(ws1())
550        .ignore_then(kw("INTO"))
551        .ignore_then(ws1())
552        .ignore_then(identifier())
553        .then(ws().ignore_then(column_list).or_not())
554        .then_ignore(ws())
555        .then(source)
556        .map(|((table_name, columns), source)| InsertStmt {
557            table_name,
558            columns,
559            source,
560        })
561}
562
563/// Parse AT function (e.g., AT cost, AT units).
564fn at_function<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserExtra<'a>> + Clone {
565    ws1()
566        .ignore_then(kw("AT"))
567        .ignore_then(ws1())
568        .ignore_then(identifier())
569}
570
571/// Parse an expression (with precedence climbing).
572#[allow(clippy::large_stack_frames)]
573fn expr<'a>() -> Boxed<'a, 'a, ParserInput<'a>, Expr, ParserExtra<'a>> {
574    recursive(|expr| {
575        let primary = primary_expr(expr.clone()).boxed();
576
577        // Unary minus
578        let unary = just('-')
579            .then_ignore(ws())
580            .or_not()
581            .then(primary)
582            .map(|(neg, e)| {
583                if neg.is_some() {
584                    Expr::unary(UnaryOperator::Neg, e)
585                } else {
586                    e
587                }
588            })
589            .boxed();
590
591        // Multiplicative: * / %
592        let multiplicative = unary
593            .clone()
594            .foldl(
595                ws().ignore_then(choice((
596                    just('*').to(BinaryOperator::Mul),
597                    just('/').to(BinaryOperator::Div),
598                    just('%').to(BinaryOperator::Mod),
599                )))
600                .then_ignore(ws())
601                .then(unary)
602                .repeated(),
603                |left, (op, right)| Expr::binary(left, op, right),
604            )
605            .boxed();
606
607        // Additive: + -
608        let additive = multiplicative
609            .clone()
610            .foldl(
611                ws().ignore_then(choice((
612                    just('+').to(BinaryOperator::Add),
613                    just('-').to(BinaryOperator::Sub),
614                )))
615                .then_ignore(ws())
616                .then(multiplicative)
617                .repeated(),
618                |left, (op, right)| Expr::binary(left, op, right),
619            )
620            .boxed();
621
622        // Comparison: = != < <= > >= ~ !~ IN NOT IN BETWEEN IS NULL
623        let comparison = additive
624            .clone()
625            .then(
626                choice((
627                    // BETWEEN ... AND
628                    ws1()
629                        .ignore_then(kw("BETWEEN"))
630                        .ignore_then(ws1())
631                        .ignore_then(additive.clone())
632                        .then_ignore(ws1())
633                        .then_ignore(kw("AND"))
634                        .then_ignore(ws1())
635                        .then(additive.clone())
636                        .map(|(low, high)| ComparisonSuffix::Between(low, high)),
637                    // NOT IN - try set literal first, then fall back to expression
638                    ws1()
639                        .ignore_then(kw("NOT"))
640                        .ignore_then(ws1())
641                        .ignore_then(kw("IN"))
642                        .ignore_then(ws())
643                        .ignore_then(choice((
644                            set_literal(expr.clone()),
645                            additive.clone(),
646                        )))
647                        .map(ComparisonSuffix::NotIn),
648                    // IN - try set literal first, then fall back to expression
649                    ws1()
650                        .ignore_then(kw("IN"))
651                        .ignore_then(ws())
652                        .ignore_then(choice((
653                            set_literal(expr.clone()),
654                            additive.clone(),
655                        )))
656                        .map(ComparisonSuffix::In),
657                    // Regular comparison operators
658                    ws()
659                        .ignore_then(comparison_op())
660                        .then_ignore(ws())
661                        .then(additive)
662                        .map(|(op, right)| ComparisonSuffix::Binary(op, right)),
663                ))
664                .or_not(),
665            )
666            .map(|(left, suffix)| match suffix {
667                Some(ComparisonSuffix::Between(low, high)) => Expr::between(left, low, high),
668                Some(ComparisonSuffix::Binary(op, right)) => Expr::binary(left, op, right),
669                Some(ComparisonSuffix::In(right)) => Expr::binary(left, BinaryOperator::In, right),
670                Some(ComparisonSuffix::NotIn(right)) => {
671                    Expr::binary(left, BinaryOperator::NotIn, right)
672                }
673                None => left,
674            })
675            // IS NULL / IS NOT NULL (postfix)
676            .then(
677                ws1()
678                    .ignore_then(kw("IS"))
679                    .ignore_then(ws1())
680                    .ignore_then(choice((
681                        kw("NOT")
682                            .ignore_then(ws1())
683                            .ignore_then(kw("NULL"))
684                            .to(UnaryOperator::IsNotNull),
685                        kw("NULL").to(UnaryOperator::IsNull),
686                    )))
687                    .or_not(),
688            )
689            .map(|(expr, is_null)| {
690                if let Some(op) = is_null {
691                    Expr::unary(op, expr)
692                } else {
693                    expr
694                }
695            })
696            .boxed();
697
698        // NOT
699        let not_expr = kw("NOT")
700            .ignore_then(ws1())
701            .repeated()
702            .collect::<Vec<_>>()
703            .then(comparison)
704            .map(|(nots, e)| {
705                nots.into_iter()
706                    .fold(e, |acc, ()| Expr::unary(UnaryOperator::Not, acc))
707            })
708            .boxed();
709
710        // AND
711        let and_expr = not_expr
712            .clone()
713            .foldl(
714                ws1()
715                    .ignore_then(kw("AND"))
716                    .ignore_then(ws1())
717                    .ignore_then(not_expr)
718                    .repeated(),
719                |left, right| Expr::binary(left, BinaryOperator::And, right),
720            )
721            .boxed();
722
723        // OR (lowest precedence)
724        and_expr.clone().foldl(
725            ws1()
726                .ignore_then(kw("OR"))
727                .ignore_then(ws1())
728                .ignore_then(and_expr)
729                .repeated(),
730            |left, right| Expr::binary(left, BinaryOperator::Or, right),
731        )
732    })
733    .boxed()
734}
735
736/// Parse comparison operators (excluding IN/NOT IN which are handled specially).
737fn comparison_op<'a>() -> impl Parser<'a, ParserInput<'a>, BinaryOperator, ParserExtra<'a>> + Clone
738{
739    choice((
740        // Multi-char operators first
741        just("!=").to(BinaryOperator::Ne),
742        just("!~").to(BinaryOperator::NotRegex),
743        just("<=").to(BinaryOperator::Le),
744        just(">=").to(BinaryOperator::Ge),
745        // Single-char operators
746        just('=').to(BinaryOperator::Eq),
747        just('<').to(BinaryOperator::Lt),
748        just('>').to(BinaryOperator::Gt),
749        just('~').to(BinaryOperator::Regex),
750    ))
751}
752
753/// Parse a set literal for IN operator, e.g., `('EUR', 'USD')`.
754///
755/// To distinguish from parenthesized expressions like `IN (tags)`, set literals
756/// require either:
757/// - Two or more comma-separated elements: `('EUR', 'USD')`
758/// - A single element with trailing comma: `('EUR',)`
759///
760/// This ensures `IN (tags)` is parsed as `IN <parenthesized-column>` rather than
761/// `IN <single-element-set>`.
762fn set_literal<'a>(
763    expr: impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone + 'a,
764) -> impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone {
765    just('(')
766        .ignore_then(ws())
767        .ignore_then(
768            // Parse first element
769            expr.clone()
770                .then(
771                    // Then require either:
772                    // - comma + more elements (with optional trailing comma)
773                    // - trailing comma (for single-element sets)
774                    ws().ignore_then(just(',')).ignore_then(ws()).ignore_then(
775                        expr.separated_by(ws().then(just(',')).then(ws()))
776                            .allow_trailing()
777                            .collect::<Vec<_>>(),
778                    ),
779                )
780                .map(|(first, rest)| {
781                    let mut elements = Vec::with_capacity(1 + rest.len());
782                    elements.push(first);
783                    elements.extend(rest);
784                    elements
785                }),
786        )
787        .then_ignore(ws())
788        .then_ignore(just(')'))
789        .map(Expr::Set)
790}
791
792/// Parse primary expressions.
793fn primary_expr<'a>(
794    expr: impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone + 'a,
795) -> impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone {
796    choice((
797        // Parenthesized expression
798        just('(')
799            .ignore_then(ws())
800            .ignore_then(expr.clone())
801            .then_ignore(ws())
802            .then_ignore(just(')'))
803            .map(|e| Expr::Paren(Box::new(e))),
804        // Function call or column reference (must come before wildcard check)
805        // Pass expr to allow nested function calls like units(sum(position))
806        function_call_or_column(expr),
807        // Literals
808        literal().map(Expr::Literal),
809        // Wildcard (fallback if nothing else matched)
810        just('*').to(Expr::Wildcard),
811    ))
812}
813
814/// Parse function call, window function, or column reference.
815fn function_call_or_column<'a>(
816    expr: impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone + 'a,
817) -> impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone {
818    identifier()
819        .then(
820            ws().ignore_then(just('('))
821                .ignore_then(ws())
822                .ignore_then(function_args(expr))
823                .then_ignore(ws())
824                .then_ignore(just(')'))
825                .or_not(),
826        )
827        .then(
828            // Optional OVER clause for window functions
829            ws1()
830                .ignore_then(kw("OVER"))
831                .ignore_then(ws())
832                .ignore_then(just('('))
833                .ignore_then(ws())
834                .ignore_then(window_spec())
835                .then_ignore(ws())
836                .then_ignore(just(')'))
837                .or_not(),
838        )
839        .map(|((name, args), over)| {
840            if let Some(args) = args {
841                if let Some(window_spec) = over {
842                    // Window function
843                    Expr::Window(WindowFunction {
844                        name,
845                        args,
846                        over: window_spec,
847                    })
848                } else {
849                    // Regular function
850                    Expr::Function(FunctionCall { name, args })
851                }
852            } else {
853                Expr::Column(name)
854            }
855        })
856}
857
858/// Parse window specification (PARTITION BY and ORDER BY).
859fn window_spec<'a>() -> impl Parser<'a, ParserInput<'a>, WindowSpec, ParserExtra<'a>> + Clone {
860    let partition_by = kw("PARTITION")
861        .ignore_then(ws1())
862        .ignore_then(kw("BY"))
863        .ignore_then(ws1())
864        .ignore_then(
865            simple_arg()
866                .separated_by(ws().then(just(',')).then(ws()))
867                .at_least(1)
868                .collect::<Vec<_>>(),
869        )
870        .then_ignore(ws());
871
872    let window_order_by = kw("ORDER")
873        .ignore_then(ws1())
874        .ignore_then(kw("BY"))
875        .ignore_then(ws1())
876        .ignore_then(
877            window_order_spec()
878                .separated_by(ws().then(just(',')).then(ws()))
879                .at_least(1)
880                .collect::<Vec<_>>(),
881        );
882
883    partition_by
884        .or_not()
885        .then(window_order_by.or_not())
886        .map(|(partition_by, order_by)| WindowSpec {
887            partition_by,
888            order_by,
889        })
890}
891
892/// Parse ORDER BY spec within window (simple version).
893fn window_order_spec<'a>() -> impl Parser<'a, ParserInput<'a>, OrderSpec, ParserExtra<'a>> + Clone {
894    simple_arg()
895        .then(
896            ws1()
897                .ignore_then(choice((
898                    kw("ASC").to(SortDirection::Asc),
899                    kw("DESC").to(SortDirection::Desc),
900                )))
901                .or_not(),
902        )
903        .map(|(expr, dir)| OrderSpec {
904            expr,
905            direction: dir.unwrap_or_default(),
906        })
907}
908
909/// Parse function arguments.
910fn function_args<'a>(
911    expr: impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone + 'a,
912) -> impl Parser<'a, ParserInput<'a>, Vec<Expr>, ParserExtra<'a>> + Clone {
913    // Allow empty args or comma-separated full expressions
914    // This enables nested function calls like units(sum(position))
915    expr.separated_by(ws().then(just(',')).then(ws())).collect()
916}
917
918/// Parse a simple function argument (column, wildcard, or literal).
919fn simple_arg<'a>() -> impl Parser<'a, ParserInput<'a>, Expr, ParserExtra<'a>> + Clone {
920    choice((
921        just('*').to(Expr::Wildcard),
922        identifier().map(Expr::Column),
923        literal().map(Expr::Literal),
924    ))
925}
926
927/// Parse a literal.
928fn literal<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserExtra<'a>> + Clone {
929    choice((
930        // Keywords first
931        kw("TRUE").to(Literal::Boolean(true)),
932        kw("FALSE").to(Literal::Boolean(false)),
933        kw("NULL").to(Literal::Null),
934        // Date literal (must be before number to avoid parsing year as number)
935        date_literal().map(Literal::Date),
936        // Number — Integer if no decimal point, Number otherwise
937        number_literal(),
938        // String
939        string_literal().map(Literal::String),
940    ))
941}
942
943/// Parse an identifier (column name, function name).
944fn identifier<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserExtra<'a>> + Clone {
945    text::ident().map(|s: &str| s.to_string())
946}
947
948/// Parse a table identifier, which can be a regular identifier or a system table
949/// starting with `#` (e.g., `#prices`, `#entries`).
950fn table_identifier<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserExtra<'a>> + Clone {
951    choice((
952        // System table: #identifier (e.g., #prices)
953        just('#')
954            .ignore_then(text::ident())
955            .map(|s: &str| format!("#{s}")),
956        // Regular table identifier
957        text::ident().map(|s: &str| s.to_string()),
958    ))
959}
960
961/// Parse a string literal.
962fn string_literal<'a>() -> impl Parser<'a, ParserInput<'a>, String, ParserExtra<'a>> + Clone {
963    // Double-quoted string
964    let double_quoted = just('"')
965        .ignore_then(
966            none_of("\"\\")
967                .or(just('\\').ignore_then(any()))
968                .repeated()
969                .collect::<String>(),
970        )
971        .then_ignore(just('"'));
972
973    // Single-quoted string (SQL-style)
974    let single_quoted = just('\'')
975        .ignore_then(
976            none_of("'\\")
977                .or(just('\\').ignore_then(any()))
978                .repeated()
979                .collect::<String>(),
980        )
981        .then_ignore(just('\''));
982
983    choice((double_quoted, single_quoted))
984}
985
986/// Parse a date literal (YYYY-MM-DD).
987fn date_literal<'a>() -> impl Parser<'a, ParserInput<'a>, NaiveDate, ParserExtra<'a>> + Clone {
988    digits()
989        .then_ignore(just('-'))
990        .then(digits())
991        .then_ignore(just('-'))
992        .then(digits())
993        .try_map(|((year, month), day): ((&str, &str), &str), span| {
994            let year: i32 = year
995                .parse()
996                .map_err(|_| Rich::custom(span, "invalid year"))?;
997            let month: u32 = month
998                .parse()
999                .map_err(|_| Rich::custom(span, "invalid month"))?;
1000            let day: u32 = day.parse().map_err(|_| Rich::custom(span, "invalid day"))?;
1001            rustledger_core::naive_date(year, month, day)
1002                .ok_or_else(|| Rich::custom(span, "invalid date"))
1003        })
1004}
1005
1006/// Parse a numeric literal as either `Literal::Integer` (no fractional part) or
1007/// `Literal::Number` (has a fractional part, or whole-number value exceeds i64).
1008///
1009/// Distinguishing integer from decimal at the parser level matches BQL/SQL
1010/// semantics and lets functions that strictly require integer arguments
1011/// (e.g. `ROOT(account, n)`, `SUBSTR(s, start, len)`) work with literal
1012/// arguments. See issue #938.
1013fn number_literal<'a>() -> impl Parser<'a, ParserInput<'a>, Literal, ParserExtra<'a>> + Clone {
1014    just('-')
1015        .or_not()
1016        .then(digits())
1017        .then(just('.').then(digits()).or_not())
1018        .try_map(
1019            |((neg, int_part), frac_part): ((Option<char>, &str), Option<(char, &str)>), span| {
1020                let mut s = String::new();
1021                if neg.is_some() {
1022                    s.push('-');
1023                }
1024                s.push_str(int_part);
1025                match frac_part {
1026                    None => match s.parse::<i64>() {
1027                        Ok(i) => Ok(Literal::Integer(i)),
1028                        // Whole-number value out of i64 range — fall back to Decimal.
1029                        Err(_) => Decimal::from_str(&s)
1030                            .map(Literal::Number)
1031                            .map_err(|_| Rich::custom(span, "invalid number")),
1032                    },
1033                    Some((_, frac)) => {
1034                        s.push('.');
1035                        s.push_str(frac);
1036                        Decimal::from_str(&s)
1037                            .map(Literal::Number)
1038                            .map_err(|_| Rich::custom(span, "invalid number"))
1039                    }
1040                }
1041            },
1042        )
1043}
1044
1045/// Parse an integer.
1046fn integer<'a>() -> impl Parser<'a, ParserInput<'a>, i64, ParserExtra<'a>> + Clone {
1047    digits().try_map(|s: &str, span| {
1048        s.parse::<i64>()
1049            .map_err(|_| Rich::custom(span, "invalid integer"))
1050    })
1051}
1052
1053#[cfg(test)]
1054mod tests {
1055    use super::*;
1056    use rust_decimal_macros::dec;
1057
1058    #[test]
1059    fn test_simple_select() {
1060        let query = parse("SELECT * FROM year = 2024").unwrap();
1061        match query {
1062            Query::Select(sel) => {
1063                assert!(!sel.distinct);
1064                assert_eq!(sel.targets.len(), 1);
1065                assert!(matches!(sel.targets[0].expr, Expr::Wildcard));
1066                assert!(sel.from.is_some());
1067            }
1068            _ => panic!("Expected SELECT query"),
1069        }
1070    }
1071
1072    #[test]
1073    fn test_select_columns() {
1074        let query = parse("SELECT date, account, position").unwrap();
1075        match query {
1076            Query::Select(sel) => {
1077                assert_eq!(sel.targets.len(), 3);
1078                assert!(matches!(&sel.targets[0].expr, Expr::Column(c) if c == "date"));
1079                assert!(matches!(&sel.targets[1].expr, Expr::Column(c) if c == "account"));
1080                assert!(matches!(&sel.targets[2].expr, Expr::Column(c) if c == "position"));
1081            }
1082            _ => panic!("Expected SELECT query"),
1083        }
1084    }
1085
1086    #[test]
1087    fn test_select_with_alias() {
1088        let query = parse("SELECT SUM(position) AS total").unwrap();
1089        match query {
1090            Query::Select(sel) => {
1091                assert_eq!(sel.targets.len(), 1);
1092                assert_eq!(sel.targets[0].alias, Some("total".to_string()));
1093                match &sel.targets[0].expr {
1094                    Expr::Function(f) => {
1095                        assert_eq!(f.name, "SUM");
1096                        assert_eq!(f.args.len(), 1);
1097                    }
1098                    _ => panic!("Expected function"),
1099                }
1100            }
1101            _ => panic!("Expected SELECT query"),
1102        }
1103    }
1104
1105    #[test]
1106    fn test_select_distinct() {
1107        let query = parse("SELECT DISTINCT account").unwrap();
1108        match query {
1109            Query::Select(sel) => {
1110                assert!(sel.distinct);
1111            }
1112            _ => panic!("Expected SELECT query"),
1113        }
1114    }
1115
1116    #[test]
1117    fn test_select_distinct_no_space() {
1118        // Issue #640: DISTINCT(expr) without space should not be parsed as a function call
1119        let query = parse("SELECT DISTINCT(account) FROM postings").unwrap();
1120        match query {
1121            Query::Select(sel) => {
1122                assert!(sel.distinct);
1123            }
1124            _ => panic!("Expected SELECT query"),
1125        }
1126    }
1127
1128    #[test]
1129    fn test_select_distinct_coalesce_no_space() {
1130        // Issue #640: DISTINCT(COALESCE(payee, narration)) should work
1131        let query = parse("SELECT DISTINCT(COALESCE(payee, narration)) as payee FROM transactions")
1132            .unwrap();
1133        match query {
1134            Query::Select(sel) => {
1135                assert!(sel.distinct);
1136            }
1137            _ => panic!("Expected SELECT query"),
1138        }
1139    }
1140
1141    #[test]
1142    fn test_where_clause() {
1143        let query = parse("SELECT * WHERE account ~ \"Expenses:\"").unwrap();
1144        match query {
1145            Query::Select(sel) => {
1146                assert!(sel.where_clause.is_some());
1147                match sel.where_clause.unwrap() {
1148                    Expr::BinaryOp(op) => {
1149                        assert_eq!(op.op, BinaryOperator::Regex);
1150                    }
1151                    _ => panic!("Expected binary op"),
1152                }
1153            }
1154            _ => panic!("Expected SELECT query"),
1155        }
1156    }
1157
1158    #[test]
1159    fn test_group_by() {
1160        let query = parse("SELECT account, SUM(position) GROUP BY account").unwrap();
1161        match query {
1162            Query::Select(sel) => {
1163                assert!(sel.group_by.is_some());
1164                assert_eq!(sel.group_by.unwrap().len(), 1);
1165            }
1166            _ => panic!("Expected SELECT query"),
1167        }
1168    }
1169
1170    #[test]
1171    fn test_order_by() {
1172        let query = parse("SELECT * ORDER BY date DESC, account ASC").unwrap();
1173        match query {
1174            Query::Select(sel) => {
1175                assert!(sel.order_by.is_some());
1176                let order = sel.order_by.unwrap();
1177                assert_eq!(order.len(), 2);
1178                assert_eq!(order[0].direction, SortDirection::Desc);
1179                assert_eq!(order[1].direction, SortDirection::Asc);
1180            }
1181            _ => panic!("Expected SELECT query"),
1182        }
1183    }
1184
1185    #[test]
1186    fn test_limit() {
1187        let query = parse("SELECT * LIMIT 100").unwrap();
1188        match query {
1189            Query::Select(sel) => {
1190                assert_eq!(sel.limit, Some(100));
1191            }
1192            _ => panic!("Expected SELECT query"),
1193        }
1194    }
1195
1196    #[test]
1197    fn test_from_open_close_clear() {
1198        let query = parse("SELECT * FROM OPEN ON 2024-01-01 CLOSE ON 2024-12-31 CLEAR").unwrap();
1199        match query {
1200            Query::Select(sel) => {
1201                let from = sel.from.unwrap();
1202                assert_eq!(
1203                    from.open_on,
1204                    Some(rustledger_core::naive_date(2024, 1, 1).unwrap())
1205                );
1206                assert_eq!(
1207                    from.close_on,
1208                    Some(rustledger_core::naive_date(2024, 12, 31).unwrap())
1209                );
1210                assert!(from.clear);
1211            }
1212            _ => panic!("Expected SELECT query"),
1213        }
1214    }
1215
1216    #[test]
1217    fn test_from_year_filter() {
1218        let query = parse("SELECT date, account FROM year = 2024").unwrap();
1219        match query {
1220            Query::Select(sel) => {
1221                let from = sel.from.unwrap();
1222                assert!(from.filter.is_some(), "FROM filter should be present");
1223                match from.filter.unwrap() {
1224                    Expr::BinaryOp(op) => {
1225                        assert_eq!(op.op, BinaryOperator::Eq);
1226                        assert!(matches!(op.left, Expr::Column(ref c) if c == "year"));
1227                        // Right side can be Integer or Number (parser produces Number)
1228                        match op.right {
1229                            Expr::Literal(Literal::Integer(n)) => assert_eq!(n, 2024),
1230                            Expr::Literal(Literal::Number(n)) => assert_eq!(n, dec!(2024)),
1231                            other => panic!("Expected numeric literal, got {other:?}"),
1232                        }
1233                    }
1234                    other => panic!("Expected BinaryOp, got {other:?}"),
1235                }
1236            }
1237            _ => panic!("Expected SELECT query"),
1238        }
1239    }
1240
1241    #[test]
1242    fn test_journal_query() {
1243        let query = parse("JOURNAL \"Assets:Bank\" AT cost").unwrap();
1244        match query {
1245            Query::Journal(j) => {
1246                assert_eq!(j.account_pattern, "Assets:Bank");
1247                assert_eq!(j.at_function, Some("cost".to_string()));
1248            }
1249            _ => panic!("Expected JOURNAL query"),
1250        }
1251    }
1252
1253    #[test]
1254    fn test_balances_query() {
1255        let query = parse("BALANCES AT units FROM year = 2024").unwrap();
1256        match query {
1257            Query::Balances(b) => {
1258                assert_eq!(b.at_function, Some("units".to_string()));
1259                assert!(b.from.is_some());
1260            }
1261            _ => panic!("Expected BALANCES query"),
1262        }
1263    }
1264
1265    #[test]
1266    fn test_print_query() {
1267        let query = parse("PRINT").unwrap();
1268        assert!(matches!(query, Query::Print(_)));
1269    }
1270
1271    #[test]
1272    fn test_complex_expression() {
1273        let query = parse("SELECT * WHERE date >= 2024-01-01 AND account ~ \"Expenses:\"").unwrap();
1274        match query {
1275            Query::Select(sel) => match sel.where_clause.unwrap() {
1276                Expr::BinaryOp(op) => {
1277                    assert_eq!(op.op, BinaryOperator::And);
1278                }
1279                _ => panic!("Expected AND"),
1280            },
1281            _ => panic!("Expected SELECT query"),
1282        }
1283    }
1284
1285    #[test]
1286    fn test_integer_literal_parsing() {
1287        let query = parse("SELECT * WHERE year = 2024").unwrap();
1288        match query {
1289            Query::Select(sel) => match sel.where_clause.unwrap() {
1290                Expr::BinaryOp(op) => match op.right {
1291                    Expr::Literal(Literal::Integer(n)) => {
1292                        assert_eq!(n, 2024);
1293                    }
1294                    _ => panic!("Expected integer literal"),
1295                },
1296                _ => panic!("Expected binary op"),
1297            },
1298            _ => panic!("Expected SELECT query"),
1299        }
1300    }
1301
1302    #[test]
1303    fn test_integer_vs_decimal_literal() {
1304        // Whole-number literal → Integer
1305        let q = parse("SELECT * WHERE x = 42").unwrap();
1306        let Query::Select(sel) = q else {
1307            panic!("expected SELECT");
1308        };
1309        let Expr::BinaryOp(op) = sel.where_clause.unwrap() else {
1310            panic!("expected binary op");
1311        };
1312        assert!(matches!(op.right, Expr::Literal(Literal::Integer(42))));
1313
1314        // Literal with decimal point → Number, even when whole-valued
1315        let q = parse("SELECT * WHERE x = 42.0").unwrap();
1316        let Query::Select(sel) = q else {
1317            panic!("expected SELECT");
1318        };
1319        let Expr::BinaryOp(op) = sel.where_clause.unwrap() else {
1320            panic!("expected binary op");
1321        };
1322        match op.right {
1323            Expr::Literal(Literal::Number(n)) => assert_eq!(n, dec!(42.0)),
1324            other => panic!("expected Number literal, got {other:?}"),
1325        }
1326    }
1327
1328    #[test]
1329    fn test_integer_overflow_falls_back_to_number() {
1330        // i64::MAX is 9_223_372_036_854_775_807 — this exceeds it
1331        let q = parse("SELECT * WHERE x = 99999999999999999999").unwrap();
1332        let Query::Select(sel) = q else {
1333            panic!("expected SELECT");
1334        };
1335        let Expr::BinaryOp(op) = sel.where_clause.unwrap() else {
1336            panic!("expected binary op");
1337        };
1338        assert!(matches!(op.right, Expr::Literal(Literal::Number(_))));
1339    }
1340
1341    #[test]
1342    fn test_negative_integer_literal() {
1343        // Negative integer literals: the expression-level unary-minus rule
1344        // strips the `-` before the literal parser runs, so `-42` becomes
1345        // `Unary(Neg, Integer(42))` rather than `Integer(-42)`. Both forms
1346        // evaluate to `Value::Integer(-42)`.
1347        let q = parse("SELECT * WHERE x = -42").unwrap();
1348        let Query::Select(sel) = q else {
1349            panic!("expected SELECT");
1350        };
1351        let Expr::BinaryOp(op) = sel.where_clause.unwrap() else {
1352            panic!("expected binary op");
1353        };
1354        match op.right {
1355            Expr::UnaryOp(unary) => {
1356                assert_eq!(unary.op, UnaryOperator::Neg);
1357                assert!(matches!(unary.operand, Expr::Literal(Literal::Integer(42))));
1358            }
1359            other => panic!("expected Unary(Neg, Integer(42)), got {other:?}"),
1360        }
1361    }
1362
1363    #[test]
1364    fn test_semicolon_optional() {
1365        assert!(parse("SELECT *").is_ok());
1366        assert!(parse("SELECT *;").is_ok());
1367    }
1368
1369    #[test]
1370    fn test_subquery_basic() {
1371        let query = parse("SELECT * FROM (SELECT account, position)").unwrap();
1372        match query {
1373            Query::Select(sel) => {
1374                assert!(sel.from.is_some());
1375                let from = sel.from.unwrap();
1376                assert!(from.subquery.is_some());
1377                let subquery = from.subquery.unwrap();
1378                assert_eq!(subquery.targets.len(), 2);
1379            }
1380            _ => panic!("Expected SELECT query"),
1381        }
1382    }
1383
1384    #[test]
1385    fn test_subquery_with_groupby() {
1386        let query = parse(
1387            "SELECT account, total FROM (SELECT account, SUM(position) AS total GROUP BY account)",
1388        )
1389        .unwrap();
1390        match query {
1391            Query::Select(sel) => {
1392                assert_eq!(sel.targets.len(), 2);
1393                let from = sel.from.unwrap();
1394                assert!(from.subquery.is_some());
1395                let subquery = from.subquery.unwrap();
1396                assert!(subquery.group_by.is_some());
1397            }
1398            _ => panic!("Expected SELECT query"),
1399        }
1400    }
1401
1402    #[test]
1403    fn test_subquery_with_outer_where() {
1404        let query =
1405            parse("SELECT * FROM (SELECT * WHERE year = 2024) WHERE account ~ \"Expenses:\"")
1406                .unwrap();
1407        match query {
1408            Query::Select(sel) => {
1409                // Outer WHERE
1410                assert!(sel.where_clause.is_some());
1411                // Subquery with its own WHERE
1412                let from = sel.from.unwrap();
1413                let subquery = from.subquery.unwrap();
1414                assert!(subquery.where_clause.is_some());
1415            }
1416            _ => panic!("Expected SELECT query"),
1417        }
1418    }
1419
1420    #[test]
1421    fn test_nested_subquery() {
1422        // Two levels of nesting
1423        let query = parse("SELECT * FROM (SELECT * FROM (SELECT account))").unwrap();
1424        match query {
1425            Query::Select(sel) => {
1426                let from = sel.from.unwrap();
1427                let subquery1 = from.subquery.unwrap();
1428                let from2 = subquery1.from.unwrap();
1429                assert!(from2.subquery.is_some());
1430            }
1431            _ => panic!("Expected SELECT query"),
1432        }
1433    }
1434
1435    #[test]
1436    fn test_nested_function_calls() {
1437        // Test units(sum(position)) pattern
1438        let query = parse("SELECT units(sum(position))").unwrap();
1439        match query {
1440            Query::Select(sel) => {
1441                assert_eq!(sel.targets.len(), 1);
1442                match &sel.targets[0].expr {
1443                    Expr::Function(outer) => {
1444                        assert_eq!(outer.name, "units");
1445                        assert_eq!(outer.args.len(), 1);
1446                        match &outer.args[0] {
1447                            Expr::Function(inner) => {
1448                                assert_eq!(inner.name, "sum");
1449                                assert_eq!(inner.args.len(), 1);
1450                                assert!(
1451                                    matches!(&inner.args[0], Expr::Column(c) if c == "position")
1452                                );
1453                            }
1454                            _ => panic!("Expected inner function call"),
1455                        }
1456                    }
1457                    _ => panic!("Expected outer function call"),
1458                }
1459            }
1460            _ => panic!("Expected SELECT query"),
1461        }
1462    }
1463
1464    #[test]
1465    fn test_deeply_nested_function_calls() {
1466        // Test three levels of nesting
1467        let query = parse("SELECT foo(bar(baz(x)))").unwrap();
1468        match query {
1469            Query::Select(sel) => {
1470                assert_eq!(sel.targets.len(), 1);
1471                match &sel.targets[0].expr {
1472                    Expr::Function(f1) => {
1473                        assert_eq!(f1.name, "foo");
1474                        match &f1.args[0] {
1475                            Expr::Function(f2) => {
1476                                assert_eq!(f2.name, "bar");
1477                                match &f2.args[0] {
1478                                    Expr::Function(f3) => {
1479                                        assert_eq!(f3.name, "baz");
1480                                        assert!(matches!(&f3.args[0], Expr::Column(c) if c == "x"));
1481                                    }
1482                                    _ => panic!("Expected f3"),
1483                                }
1484                            }
1485                            _ => panic!("Expected f2"),
1486                        }
1487                    }
1488                    _ => panic!("Expected f1"),
1489                }
1490            }
1491            _ => panic!("Expected SELECT query"),
1492        }
1493    }
1494
1495    #[test]
1496    fn test_function_with_arithmetic() {
1497        // Test function with arithmetic expression as argument
1498        let query = parse("SELECT sum(amount * 2)").unwrap();
1499        match query {
1500            Query::Select(sel) => match &sel.targets[0].expr {
1501                Expr::Function(f) => {
1502                    assert_eq!(f.name, "sum");
1503                    assert!(matches!(&f.args[0], Expr::BinaryOp(_)));
1504                }
1505                _ => panic!("Expected function"),
1506            },
1507            _ => panic!("Expected SELECT query"),
1508        }
1509    }
1510
1511    #[test]
1512    fn test_is_null() {
1513        let query = parse("SELECT * WHERE payee IS NULL").unwrap();
1514        match query {
1515            Query::Select(sel) => match sel.where_clause.unwrap() {
1516                Expr::UnaryOp(op) => {
1517                    assert_eq!(op.op, UnaryOperator::IsNull);
1518                    assert!(matches!(&op.operand, Expr::Column(c) if c == "payee"));
1519                }
1520                _ => panic!("Expected unary op"),
1521            },
1522            _ => panic!("Expected SELECT query"),
1523        }
1524    }
1525
1526    #[test]
1527    fn test_is_not_null() {
1528        let query = parse("SELECT * WHERE payee IS NOT NULL").unwrap();
1529        match query {
1530            Query::Select(sel) => match sel.where_clause.unwrap() {
1531                Expr::UnaryOp(op) => {
1532                    assert_eq!(op.op, UnaryOperator::IsNotNull);
1533                    assert!(matches!(&op.operand, Expr::Column(c) if c == "payee"));
1534                }
1535                _ => panic!("Expected unary op"),
1536            },
1537            _ => panic!("Expected SELECT query"),
1538        }
1539    }
1540
1541    #[test]
1542    fn test_not_regex() {
1543        let query = parse("SELECT * WHERE account !~ \"Assets:\"").unwrap();
1544        match query {
1545            Query::Select(sel) => match sel.where_clause.unwrap() {
1546                Expr::BinaryOp(op) => {
1547                    assert_eq!(op.op, BinaryOperator::NotRegex);
1548                }
1549                _ => panic!("Expected binary op"),
1550            },
1551            _ => panic!("Expected SELECT query"),
1552        }
1553    }
1554
1555    #[test]
1556    fn test_modulo() {
1557        let query = parse("SELECT year % 4").unwrap();
1558        match query {
1559            Query::Select(sel) => match &sel.targets[0].expr {
1560                Expr::BinaryOp(op) => {
1561                    assert_eq!(op.op, BinaryOperator::Mod);
1562                }
1563                _ => panic!("Expected binary op"),
1564            },
1565            _ => panic!("Expected SELECT query"),
1566        }
1567    }
1568
1569    #[test]
1570    fn test_between() {
1571        let query = parse("SELECT * WHERE year BETWEEN 2020 AND 2024").unwrap();
1572        match query {
1573            Query::Select(sel) => match sel.where_clause.unwrap() {
1574                Expr::Between { value, low, high } => {
1575                    assert!(matches!(*value, Expr::Column(c) if c == "year"));
1576                    assert!(matches!(*low, Expr::Literal(Literal::Integer(_))));
1577                    assert!(matches!(*high, Expr::Literal(Literal::Integer(_))));
1578                }
1579                _ => panic!("Expected BETWEEN"),
1580            },
1581            _ => panic!("Expected SELECT query"),
1582        }
1583    }
1584
1585    #[test]
1586    fn test_not_in() {
1587        let query = parse("SELECT * WHERE account NOT IN tags").unwrap();
1588        match query {
1589            Query::Select(sel) => match sel.where_clause.unwrap() {
1590                Expr::BinaryOp(op) => {
1591                    assert_eq!(op.op, BinaryOperator::NotIn);
1592                }
1593                _ => panic!("Expected binary op"),
1594            },
1595            _ => panic!("Expected SELECT query"),
1596        }
1597    }
1598
1599    #[test]
1600    fn test_in_set_literal() {
1601        // Multi-element set literal
1602        let query = parse("SELECT * WHERE currency IN ('EUR', 'USD')").unwrap();
1603        match query {
1604            Query::Select(sel) => match sel.where_clause.unwrap() {
1605                Expr::BinaryOp(op) => {
1606                    assert_eq!(op.op, BinaryOperator::In);
1607                    match op.right {
1608                        Expr::Set(elements) => {
1609                            assert_eq!(elements.len(), 2);
1610                        }
1611                        _ => panic!("Expected Set"),
1612                    }
1613                }
1614                _ => panic!("Expected binary op"),
1615            },
1616            _ => panic!("Expected SELECT query"),
1617        }
1618
1619        // Single-element set with trailing comma
1620        let query = parse("SELECT * WHERE currency IN ('EUR',)").unwrap();
1621        match query {
1622            Query::Select(sel) => match sel.where_clause.unwrap() {
1623                Expr::BinaryOp(op) => {
1624                    assert_eq!(op.op, BinaryOperator::In);
1625                    match op.right {
1626                        Expr::Set(elements) => {
1627                            assert_eq!(elements.len(), 1);
1628                        }
1629                        _ => panic!("Expected Set"),
1630                    }
1631                }
1632                _ => panic!("Expected binary op"),
1633            },
1634            _ => panic!("Expected SELECT query"),
1635        }
1636
1637        // Parenthesized column (not a set literal)
1638        let query = parse("SELECT * WHERE 'x' IN (tags)").unwrap();
1639        match query {
1640            Query::Select(sel) => match sel.where_clause.unwrap() {
1641                Expr::BinaryOp(op) => {
1642                    assert_eq!(op.op, BinaryOperator::In);
1643                    // Should be Paren(Column), not Set([Column])
1644                    match op.right {
1645                        Expr::Paren(inner) => match *inner {
1646                            Expr::Column(name) => assert_eq!(name, "tags"),
1647                            _ => panic!("Expected Column inside Paren"),
1648                        },
1649                        other => panic!("Expected Paren, got {other:?}"),
1650                    }
1651                }
1652                _ => panic!("Expected binary op"),
1653            },
1654            _ => panic!("Expected SELECT query"),
1655        }
1656    }
1657
1658    #[test]
1659    fn test_string_arg_function() {
1660        // First test a function with a column reference - should work
1661        let query = parse("SELECT foo(x)").unwrap();
1662        match query {
1663            Query::Select(sel) => match &sel.targets[0].expr {
1664                Expr::Function(f) => {
1665                    assert_eq!(f.name, "foo");
1666                }
1667                _ => panic!("Expected function"),
1668            },
1669            _ => panic!("Expected SELECT query"),
1670        }
1671
1672        // Now test a function with a string literal argument
1673        let query = parse("SELECT foo('bar')").unwrap();
1674        match query {
1675            Query::Select(sel) => match &sel.targets[0].expr {
1676                Expr::Function(f) => {
1677                    assert_eq!(f.name, "foo");
1678                    assert!(matches!(&f.args[0], Expr::Literal(Literal::String(s)) if s == "bar"));
1679                }
1680                _ => panic!("Expected function"),
1681            },
1682            _ => panic!("Expected SELECT query"),
1683        }
1684    }
1685
1686    #[test]
1687    fn test_meta_function() {
1688        let query = parse("SELECT meta('category')").unwrap();
1689        match query {
1690            Query::Select(sel) => match &sel.targets[0].expr {
1691                Expr::Function(f) => {
1692                    assert_eq!(f.name.to_uppercase(), "META");
1693                    assert_eq!(f.args.len(), 1);
1694                    assert!(
1695                        matches!(&f.args[0], Expr::Literal(Literal::String(s)) if s == "category")
1696                    );
1697                }
1698                _ => panic!("Expected function"),
1699            },
1700            _ => panic!("Expected SELECT query"),
1701        }
1702    }
1703
1704    #[test]
1705    fn test_entry_meta_function() {
1706        let query = parse("SELECT entry_meta('source')").unwrap();
1707        match query {
1708            Query::Select(sel) => match &sel.targets[0].expr {
1709                Expr::Function(f) => {
1710                    assert_eq!(f.name.to_uppercase(), "ENTRY_META");
1711                    assert_eq!(f.args.len(), 1);
1712                }
1713                _ => panic!("Expected function"),
1714            },
1715            _ => panic!("Expected SELECT query"),
1716        }
1717    }
1718
1719    #[test]
1720    fn test_convert_function() {
1721        let query = parse("SELECT convert(position, 'USD')").unwrap();
1722        match query {
1723            Query::Select(sel) => match &sel.targets[0].expr {
1724                Expr::Function(f) => {
1725                    assert_eq!(f.name.to_uppercase(), "CONVERT");
1726                    assert_eq!(f.args.len(), 2);
1727                }
1728                _ => panic!("Expected function"),
1729            },
1730            _ => panic!("Expected SELECT query"),
1731        }
1732    }
1733
1734    #[test]
1735    fn test_type_cast_functions() {
1736        // Test INT
1737        let query = parse("SELECT int(number)").unwrap();
1738        match query {
1739            Query::Select(sel) => match &sel.targets[0].expr {
1740                Expr::Function(f) => {
1741                    assert_eq!(f.name.to_uppercase(), "INT");
1742                    assert_eq!(f.args.len(), 1);
1743                }
1744                _ => panic!("Expected function"),
1745            },
1746            _ => panic!("Expected SELECT query"),
1747        }
1748
1749        // Test DECIMAL
1750        let query = parse("SELECT decimal('123.45')").unwrap();
1751        match query {
1752            Query::Select(sel) => match &sel.targets[0].expr {
1753                Expr::Function(f) => {
1754                    assert_eq!(f.name.to_uppercase(), "DECIMAL");
1755                }
1756                _ => panic!("Expected function"),
1757            },
1758            _ => panic!("Expected SELECT query"),
1759        }
1760
1761        // Test STR
1762        let query = parse("SELECT str(123)").unwrap();
1763        match query {
1764            Query::Select(sel) => match &sel.targets[0].expr {
1765                Expr::Function(f) => {
1766                    assert_eq!(f.name.to_uppercase(), "STR");
1767                }
1768                _ => panic!("Expected function"),
1769            },
1770            _ => panic!("Expected SELECT query"),
1771        }
1772
1773        // Test BOOL
1774        let query = parse("SELECT bool(1)").unwrap();
1775        match query {
1776            Query::Select(sel) => match &sel.targets[0].expr {
1777                Expr::Function(f) => {
1778                    assert_eq!(f.name.to_uppercase(), "BOOL");
1779                }
1780                _ => panic!("Expected function"),
1781            },
1782            _ => panic!("Expected SELECT query"),
1783        }
1784    }
1785
1786    #[test]
1787    fn test_system_table_prices() {
1788        // Test parsing SELECT FROM #prices (system table)
1789        let query = parse("SELECT date, currency, amount FROM #prices").unwrap();
1790        match query {
1791            Query::Select(sel) => {
1792                assert_eq!(sel.targets.len(), 3);
1793                assert!(matches!(&sel.targets[0].expr, Expr::Column(c) if c == "date"));
1794                assert!(matches!(&sel.targets[1].expr, Expr::Column(c) if c == "currency"));
1795                assert!(matches!(&sel.targets[2].expr, Expr::Column(c) if c == "amount"));
1796                let from = sel.from.unwrap();
1797                assert_eq!(from.table_name, Some("#prices".to_string()));
1798            }
1799            _ => panic!("Expected SELECT query"),
1800        }
1801    }
1802
1803    #[test]
1804    fn test_system_table_with_where() {
1805        // Test parsing system table with WHERE clause
1806        let query = parse("SELECT * FROM #prices WHERE currency = 'EUR'").unwrap();
1807        match query {
1808            Query::Select(sel) => {
1809                let from = sel.from.unwrap();
1810                assert_eq!(from.table_name, Some("#prices".to_string()));
1811                assert!(sel.where_clause.is_some());
1812            }
1813            _ => panic!("Expected SELECT query"),
1814        }
1815    }
1816
1817    #[test]
1818    fn test_regular_table_identifier() {
1819        // Test parsing a regular (non-system) table
1820        let query = parse("SELECT * FROM MyTable WHERE x = 1").unwrap();
1821        match query {
1822            Query::Select(sel) => {
1823                let from = sel.from.unwrap();
1824                assert_eq!(from.table_name, Some("MyTable".to_string()));
1825            }
1826            _ => panic!("Expected SELECT query"),
1827        }
1828    }
1829
1830    /// Pathologically deep paren nesting must fail fast with a clean
1831    /// error rather than overflowing the stack or parsing in
1832    /// super-linear time (query-fuzzer slow-unit denial-of-service).
1833    #[test]
1834    fn deeply_nested_parens_rejected_without_stack_overflow() {
1835        let src = format!("SELECT {}", "(".repeat(2000));
1836        let err = parse(&src).expect_err("deeply nested parens should be rejected");
1837        assert!(
1838            matches!(err.kind, ParseErrorKind::SyntaxError(ref m) if m.contains("nesting too deep")),
1839            "expected a nesting-depth error, got: {:?}",
1840            err.kind
1841        );
1842    }
1843
1844    /// The guard must not reject legitimately nested queries: nesting up
1845    /// to the limit still parses.
1846    #[test]
1847    fn moderate_nesting_still_parses() {
1848        // 100 levels of real function nesting (well under the 128 cap).
1849        let depth = 100usize;
1850        let inner = "x".to_string();
1851        let body = format!("{}{}{}", "abs(".repeat(depth), inner, ")".repeat(depth));
1852        let src = format!("SELECT {body}");
1853        assert!(
1854            parse(&src).is_ok(),
1855            "a {depth}-deep nested query within the limit should parse"
1856        );
1857    }
1858
1859    /// Parens inside string literals are opaque and must not count
1860    /// toward the nesting limit.
1861    #[test]
1862    fn parens_inside_string_literal_dont_count() {
1863        let src = format!("SELECT account WHERE narration = \"{}\"", "(".repeat(2000));
1864        // Assert directly on the guard: 2000 parens inside a string
1865        // literal must not register as nesting depth at all.
1866        assert!(
1867            nesting_exceeds_limit(&src).is_none(),
1868            "parens inside a string literal must not count toward the nesting limit"
1869        );
1870        // And the query as a whole still parses (the string is opaque).
1871        assert!(parse(&src).is_ok(), "string-literal query should parse");
1872    }
1873}