qail_core/
parser.rs

1//! QAIL Parser using nom.
2//!
3//! Parses QAIL syntax into an AST.
4//!
5//! # Syntax Overview
6//!
7//! ```text
8//! get::users•@id@email[active=true][lim=10]
9//! ─┬─ ─┬─  ┬ ─────┬───── ─────┬──────────
10//!  │   │   │      │           │
11//!  │   │   │      │           └── Cages (filters, limits)
12//!  │   │   │      └── Hooks (columns)
13//!  │   │   └── Pivot (connects to table)
14//!  │   └── Table name
15//!  └── Gate (action)
16//! ```
17
18use nom::{
19    branch::alt,
20    bytes::complete::{tag, take_while, take_while1},
21    character::complete::{char, digit1, multispace1, not_line_ending},
22    combinator::{map, opt, recognize, value},
23    multi::many0,
24    sequence::{pair, preceded, tuple},
25    IResult,
26};
27
28use crate::ast::*;
29use crate::error::{QailError, QailResult};
30
31/// Parse whitespace or comments.
32fn ws_or_comment(input: &str) -> IResult<&str, ()> {
33    value((), many0(alt((
34        value((), multispace1),
35        parse_comment,
36    ))))(input)
37}
38
39/// Parse a single comment line (// ... or -- ...).
40fn parse_comment(input: &str) -> IResult<&str, ()> {
41    value((), pair(alt((tag("//"), tag("--"))), not_line_ending))(input)
42}
43
44/// Parse a complete QAIL query string.
45pub fn parse(input: &str) -> QailResult<QailCmd> {
46    let input = input.trim();
47    
48    match parse_qail_cmd(input) {
49        Ok(("", cmd)) => Ok(cmd),
50        Ok((remaining, _)) => Err(QailError::parse(
51            input.len() - remaining.len(),
52            format!("Unexpected trailing content: '{}'", remaining),
53        )),
54        Err(e) => Err(QailError::parse(0, format!("Parse failed: {:?}", e))),
55    }
56}
57
58/// Parse the complete QAIL command.
59fn parse_qail_cmd(input: &str) -> IResult<&str, QailCmd> {
60    let (input, action) = parse_action(input)?;
61    let (input, _) = tag("::")(input)?;
62    let (input, table) = parse_identifier(input)?;
63    let (input, joins) = parse_joins(input)?;
64    let (input, _) = ws_or_comment(input)?; // Allow ws/comment before pivot
65    let (input, _) = opt(tag("•"))(input)?; // Pivot is optional if no columns
66    let (input, _) = ws_or_comment(input)?;
67    let (input, columns) = parse_columns(input)?;
68    let (input, _) = ws_or_comment(input)?;
69    let (input, cages) = parse_cages(input)?;
70
71    Ok((
72        input,
73        QailCmd {
74            action,
75            table: table.to_string(),
76            joins,
77            columns,
78            cages,
79        },
80    ))
81}
82
83/// Parse the action (get, set, del, add, gen).
84fn parse_action(input: &str) -> IResult<&str, Action> {
85    alt((
86        value(Action::Get, tag("get")),
87        value(Action::Set, tag("set")),
88        value(Action::Del, tag("del")),
89        value(Action::Add, tag("add")),
90        value(Action::Gen, tag("gen")),
91        value(Action::Make, tag("make")),
92        value(Action::Mod, tag("mod")),
93        value(Action::Over, tag("over")),
94        value(Action::With, tag("with")),
95    ))(input)
96}
97
98/// Parse an identifier (table name, column name).
99fn parse_identifier(input: &str) -> IResult<&str, &str> {
100    take_while1(|c: char| c.is_alphanumeric() || c == '_')(input)
101}
102
103fn parse_joins(input: &str) -> IResult<&str, Vec<Join>> {
104    many0(map(preceded(preceded(ws_or_comment, tag("->")), parse_identifier), |t| Join {
105        table: t.to_string(),
106        kind: JoinKind::Inner,
107    }))(input)
108}
109
110/// Parse columns (hooks).
111fn parse_columns(input: &str) -> IResult<&str, Vec<Column>> {
112    many0(preceded(ws_or_comment, parse_any_column))(input)
113}
114
115fn parse_any_column(input: &str) -> IResult<&str, Column> {
116    alt((
117        // Standard Hook: @col...
118        preceded(char('@'), parse_at_column),
119        // Add Hook: +col...
120        preceded(char('+'), parse_add_column),
121        // Drop Hook: -col... (can also be @-col if user mixes styles, but strict parser uses -)
122        preceded(char('-'), parse_drop_column),
123    ))(input)
124}
125
126fn parse_at_column(input: &str) -> IResult<&str, Column> {
127    alt((
128        value(Column::Star, char('*')),
129        // Check for drop via @-name convention if needed, essentially mapping @-name to Mod Drop
130        map(preceded(char('-'), parse_identifier), |name| Column::Mod { 
131            kind: ModKind::Drop, 
132            col: Box::new(Column::Named(name.to_string())) 
133        }),
134        parse_column_full_def_or_named, 
135    ))(input)
136}
137
138fn parse_add_column(input: &str) -> IResult<&str, Column> {
139    map(parse_column_full_def_or_named, |col| Column::Mod {
140        kind: ModKind::Add,
141        col: Box::new(col),
142    })(input)
143}
144
145fn parse_drop_column(input: &str) -> IResult<&str, Column> {
146    map(parse_identifier, |name| Column::Mod {
147        kind: ModKind::Drop,
148        col: Box::new(Column::Named(name.to_string())),
149    })(input)
150}
151
152fn parse_column_full_def_or_named(input: &str) -> IResult<&str, Column> {
153    // 1. Parse Name
154    let (input, name) = parse_identifier(input)?;
155    
156    // 2. Opt: Aggregates (#func)
157    if let Ok((input, Some(func))) = opt(preceded(char('#'), parse_agg_func))(input) {
158        return Ok((input, Column::Aggregate {
159             col: name.to_string(),
160             func
161        }));
162    }
163    
164    // 3. Opt: check for colon
165    if let Ok((input, _)) = char::<_, nom::error::Error<&str>>(':')(input) {
166        // We have a type OR a window function.
167        let (input, type_or_func) = parse_identifier(input)?;
168        
169        let (input, _) = ws_or_comment(input)?;
170        
171        // Peek/Check for open paren `(`
172        if let Ok((input, _)) = char::<_, nom::error::Error<&str>>('(')(input) {
173            // It IS a function call -> Window Column
174            // We sat on `(`, so continue parsing args
175            let (input, _) = ws_or_comment(input)?;
176            let (input, args) = opt(tuple((
177                parse_value,
178                many0(preceded(
179                    tuple((ws_or_comment, char(','), ws_or_comment)),
180                    parse_value
181                ))
182            )))(input)?;
183            let (input, _) = ws_or_comment(input)?;
184            let (input, _) = char(')')(input)?;
185            
186            let params = match args {
187                Some((first, mut rest)) => {
188                    let mut v = vec![first];
189                    v.append(&mut rest);
190                    v
191                },
192                None => vec![],
193            };
194
195            // Parse Order Cages (e.g. ^!amount) which are technically sort cages
196            let (input, sorts) = many0(parse_sort_cage)(input)?;
197            
198            // Parse Partition: {Part=...}
199            let (input, partitions) = opt(parse_partition_block)(input)?;
200            let partition = partitions.unwrap_or_default();
201
202            return Ok((input, Column::Window {
203                name: name.to_string(),
204                func: type_or_func.to_string(),
205                params,
206                partition,
207                order: sorts,
208            }));
209        } else {
210            // It is just a Type Definition
211            // Parse Constraints
212            let (input, constraints) = parse_constraints(input)?;
213            
214            return Ok((input, Column::Def { 
215                name: name.to_string(), 
216                data_type: type_or_func.to_string(), 
217                constraints 
218            }));
219        }
220    }
221    
222    // No colon, check for constraints (inferred type Def)
223    let (input, constraints) = parse_constraints(input)?;
224    if !constraints.is_empty() {
225         Ok((input, Column::Def { 
226            name: name.to_string(), 
227            data_type: "str".to_string(), 
228            constraints 
229        }))
230    } else {
231        // Just a named column
232        Ok((input, Column::Named(name.to_string())))
233    }
234}
235
236fn parse_constraints(input: &str) -> IResult<&str, Vec<Constraint>> {
237    many0(alt((
238        value(Constraint::PrimaryKey, tag("^pk")),
239        value(Constraint::Unique, tag("^uniq")),
240        value(Constraint::Nullable, char('?')),
241    )))(input)
242}
243
244fn parse_agg_func(input: &str) -> IResult<&str, AggregateFunc> {
245    alt((
246        value(AggregateFunc::Count, tag("count")),
247        value(AggregateFunc::Sum, tag("sum")),
248        value(AggregateFunc::Avg, tag("avg")),
249        value(AggregateFunc::Min, tag("min")),
250        value(AggregateFunc::Max, tag("max")),
251    ))(input)
252}
253
254/// Parse all cages.
255fn parse_cages(input: &str) -> IResult<&str, Vec<Cage>> {
256    many0(preceded(ws_or_comment, parse_cage))(input)
257}
258
259/// Parse a single cage [...].
260fn parse_cage(input: &str) -> IResult<&str, Cage> {
261    let (input, _) = char('[')(input)?;
262    let (input, _) = ws_or_comment(input)?;
263    
264    // Check for special cage types
265    if let Ok((remaining, cage)) = parse_limit_cage(input) {
266        let (remaining, _) = ws_or_comment(remaining)?;
267        let (remaining, _) = char(']')(remaining)?;
268        return Ok((remaining, cage));
269    }
270    
271    if let Ok((remaining, cage)) = parse_sort_cage(input) {
272        let (remaining, _) = ws_or_comment(remaining)?;
273        let (remaining, _) = char(']')(remaining)?;
274        return Ok((remaining, cage));
275    }
276    
277    // Otherwise, parse as filter conditions
278    let (input, (conditions, logical_op)) = parse_conditions(input)?;
279    let (input, _) = ws_or_comment(input)?;
280    let (input, _) = char(']')(input)?;
281    
282    Ok((
283        input,
284        Cage {
285            kind: CageKind::Filter,
286            conditions,
287            logical_op,
288        },
289    ))
290}
291
292/// Parse limit cage [lim=N].
293fn parse_limit_cage(input: &str) -> IResult<&str, Cage> {
294    let (input, _) = tag("lim")(input)?;
295    let (input, _) = ws_or_comment(input)?;
296    let (input, _) = char('=')(input)?;
297    let (input, _) = ws_or_comment(input)?;
298    let (input, n) = digit1(input)?;
299    
300    Ok((
301        input,
302        Cage {
303            kind: CageKind::Limit(n.parse().unwrap_or(10)),
304            conditions: vec![],
305            logical_op: LogicalOp::And,
306        },
307    ))
308}
309
310/// Parse sort cage [^col] or [^!col].
311fn parse_sort_cage(input: &str) -> IResult<&str, Cage> {
312    let (input, _) = char('^')(input)?;
313    let (input, desc) = opt(char('!'))(input)?;
314    let (input, col) = parse_identifier(input)?;
315    
316    let order = if desc.is_some() {
317        SortOrder::Desc
318    } else {
319        SortOrder::Asc
320    };
321    
322    Ok((
323        input,
324        Cage {
325            kind: CageKind::Sort(order),
326            conditions: vec![Condition {
327                column: col.to_string(),
328                op: Operator::Eq,
329                value: Value::Null,
330                is_array_unnest: false,
331            }],
332            logical_op: LogicalOp::And,
333        },
334    ))
335}
336
337/// Parse conditions within a cage, returning both conditions and the logical operator.
338fn parse_conditions(input: &str) -> IResult<&str, (Vec<Condition>, LogicalOp)> {
339    // Parse first condition
340    let (input, first) = parse_condition(input)?;
341    let mut conditions = vec![first];
342    let mut logical_op = LogicalOp::And;
343    
344    // Parse remaining conditions with their operators
345    let mut remaining = input;
346    loop {
347        // Skip whitespace
348        let (input, _) = ws_or_comment(remaining)?;
349        
350        // Check for operator character
351        let first_char = input.chars().next();
352        match first_char {
353            Some('|') => {
354                logical_op = LogicalOp::Or;
355                let input = &input[1..]; // consume '|'
356                let (input, _) = ws_or_comment(input)?;
357                let (input, cond) = parse_condition(input)?;
358                conditions.push(cond);
359                remaining = input;
360            }
361            Some('&') => {
362                let input = &input[1..]; // consume '&'
363                let (input, _) = ws_or_comment(input)?;
364                let (input, cond) = parse_condition(input)?;
365                conditions.push(cond);
366                remaining = input;
367            }
368            _ => break,
369        }
370    }
371    
372    Ok((remaining, (conditions, logical_op)))
373}
374
375/// Parse a single condition.
376fn parse_condition(input: &str) -> IResult<&str, Condition> {
377    let (input, column) = parse_identifier(input)?;
378    
379    // Check for array unnest syntax: column[*]
380    let (input, is_array_unnest) = if input.starts_with("[*]") {
381        (&input[3..], true) // consume "[*]"
382    } else {
383        (input, false)
384    };
385    
386    let (input, _) = ws_or_comment(input)?;
387    let (input, (op, value)) = parse_operator_and_value(input)?;
388    
389    Ok((
390        input,
391        Condition {
392            column: column.to_string(),
393            op,
394            value,
395            is_array_unnest,
396        },
397    ))
398}
399
400/// Parse operator and value together.
401fn parse_operator_and_value(input: &str) -> IResult<&str, (Operator, Value)> {
402    alt((
403        // Fuzzy match: ~value
404        map(preceded(char('~'), parse_value), |v| (Operator::Fuzzy, v)),
405        // Greater than or equal: >=value
406        map(preceded(tag(">="), parse_value), |v| (Operator::Gte, v)),
407        // Less than or equal: <=value
408        map(preceded(tag("<="), parse_value), |v| (Operator::Lte, v)),
409        // Not equal: !=value
410        map(preceded(tag("!="), parse_value), |v| (Operator::Ne, v)),
411        // Greater than: >value
412        map(preceded(char('>'), parse_value), |v| (Operator::Gt, v)),
413        // Less than: <value
414        map(preceded(char('<'), parse_value), |v| (Operator::Lt, v)),
415        // Equal: =value
416        map(preceded(char('='), parse_value), |v| (Operator::Eq, v)),
417    ))(input)
418}
419
420/// Parse a value.
421fn parse_value(input: &str) -> IResult<&str, Value> {
422    let (input, _) = ws_or_comment(input)?;
423    
424    alt((
425        // Parameter: $1, $2, etc.
426        map(preceded(char('$'), digit1), |n: &str| {
427            Value::Param(n.parse().unwrap_or(1))
428        }),
429        // Boolean: true/false
430        value(Value::Bool(true), tag("true")),
431        value(Value::Bool(false), tag("false")),
432        // Function call: name(args)
433        parse_function_call,
434        // Function without parens: now, etc. (keyword-like)
435        map(tag("now"), |_| Value::Function("now".to_string())),
436        // Number (float or int)
437        parse_number,
438        // String
439        parse_quoted_string,
440        // Bare identifier (treated as string)
441        map(parse_identifier, |s| Value::String(s.to_string())),
442    ))(input)
443}
444
445/// Parse function call: name(arg1, arg2)
446fn parse_function_call(input: &str) -> IResult<&str, Value> {
447    let (input, name) = parse_identifier(input)?;
448    let (input, _) = char('(')(input)?;
449    let (input, _) = ws_or_comment(input)?;
450    let (input, args) = opt(tuple((
451        parse_value,
452        many0(preceded(
453            tuple((ws_or_comment, char(','), ws_or_comment)),
454            parse_value
455        ))
456    )))(input)?;
457    let (input, _) = ws_or_comment(input)?;
458    let (input, _) = char(')')(input)?;
459
460    let params = match args {
461        Some((first, mut rest)) => {
462            let mut v = vec![first];
463            v.append(&mut rest);
464            v
465        },
466        None => vec![],
467    };
468
469    // If it's a known function that returns a value type we strictly handle, we might map it.
470    // For now, Value::Function stores name and args? 
471    // Wait, Value::Function(String) only stores name! 
472    // We need to update Value::Function to store params or serialize as string?
473    // Current Ast: Value::Function(String). usage `now()`.
474    // If I change AST Value::Function, I break deserialization potentially or need large refactor.
475    // For `rank()`, it's a valid Value?? 
476    // Actually, `Value` is for conditions `WHERE col = val`.
477    // Window Func is in `Column`. `Column::Window` has `params: Vec<Value>`.
478    
479    // So parse_function_call should return (String, Vec<Value>) not Value.
480    // But parse_value needs to return Value.
481    // Let's keep parse_value returns Value::Function(name) for simple 0-arg funcs.
482    // For parsing Window Columns, we use a dedicated parser.
483    Ok((input, Value::Function(format!("{}({})", name, params.iter().map(|v| v.to_string()).collect::<Vec<_>>().join(", ")))))
484}
485
486/// Parse a number (integer or float).
487fn parse_number(input: &str) -> IResult<&str, Value> {
488    let (input, num_str) = recognize(tuple((
489        opt(char('-')),
490        digit1,
491        opt(pair(char('.'), digit1)),
492    )))(input)?;
493    
494    if num_str.contains('.') {
495        Ok((input, Value::Float(num_str.parse().unwrap_or(0.0))))
496    } else {
497        Ok((input, Value::Int(num_str.parse().unwrap_or(0))))
498    }
499}
500
501/// Parse a quoted string.
502fn parse_quoted_string(input: &str) -> IResult<&str, Value> {
503    let (input, _) = char('\'')(input)?;
504    let (input, content) = take_while(|c| c != '\'')(input)?;
505    let (input, _) = char('\'')(input)?;
506    
507    Ok((input, Value::String(content.to_string())))
508}
509
510/// Parse Window Column Definition: @name:func(args)^sort{Part=...}
511fn parse_window_column(input: &str) -> IResult<&str, Column> {
512    // 1. Parse Name
513    let (input, name) = parse_identifier(input)?;
514    let (input, _) = char(':')(input)?;
515    
516    // 2. Parse Function Call (name + params)
517    let (input, func_name) = parse_identifier(input)?;
518    let (input, _) = char('(')(input)?;
519    let (input, _) = ws_or_comment(input)?;
520    let (input, args) = opt(tuple((
521        parse_value,
522        many0(preceded(
523            tuple((ws_or_comment, char(','), ws_or_comment)),
524            parse_value
525        ))
526    )))(input)?;
527    let (input, _) = ws_or_comment(input)?;
528    let (input, _) = char(')')(input)?;
529    
530    let params = match args {
531        Some((first, mut rest)) => {
532            let mut v = vec![first];
533            v.append(&mut rest);
534            v
535        },
536        None => vec![],
537    };
538
539    // 3. Parse Order Cages (e.g. ^!amount)
540    let (input, sorts) = many0(parse_sort_cage)(input)?;
541    
542    // 4. Parse Partition: {Part=col1,col2}
543    let (input, partitions) = opt(parse_partition_block)(input)?;
544    let partition = partitions.unwrap_or_default();
545
546    Ok((input, Column::Window {
547        name: name.to_string(),
548        func: func_name.to_string(),
549        params,
550        partition,
551        order: sorts,
552    }))
553}
554
555fn parse_partition_block(input: &str) -> IResult<&str, Vec<String>> {
556    let (input, _) = char('{')(input)?;
557    let (input, _) = ws_or_comment(input)?;
558    let (input, _) = tag("Part")(input)?; // Case sensitive?
559    let (input, _) = ws_or_comment(input)?;
560    let (input, _) = char('=')(input)?;
561    let (input, _) = ws_or_comment(input)?;
562    
563    let (input, first) = parse_identifier(input)?;
564    let (input, mut rest) = many0(preceded(
565        tuple((ws_or_comment, char(','), ws_or_comment)),
566        parse_identifier
567    ))(input)?;
568    
569    let (input, _) = ws_or_comment(input)?;
570    let (input, _) = char('}')(input)?;
571    
572    let mut cols = vec![first.to_string()];
573    cols.append(&mut rest.iter().map(|s| s.to_string()).collect());
574    Ok((input, cols))
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580
581    #[test]
582    fn test_simple_get() {
583        let cmd = parse("get::users•@*").unwrap();
584        assert_eq!(cmd.action, Action::Get);
585        assert_eq!(cmd.table, "users");
586        assert_eq!(cmd.columns, vec![Column::Star]);
587    }
588
589    #[test]
590    fn test_get_with_columns() {
591        let cmd = parse("get::users•@id@email@role").unwrap();
592        assert_eq!(cmd.action, Action::Get);
593        assert_eq!(cmd.table, "users");
594        assert_eq!(
595            cmd.columns,
596            vec![
597                Column::Named("id".to_string()),
598                Column::Named("email".to_string()),
599                Column::Named("role".to_string()),
600            ]
601        );
602    }
603
604    #[test]
605    fn test_get_with_filter() {
606        let cmd = parse("get::users•@*[active=true]").unwrap();
607        assert_eq!(cmd.cages.len(), 1);
608        assert_eq!(cmd.cages[0].kind, CageKind::Filter);
609        assert_eq!(cmd.cages[0].conditions.len(), 1);
610        assert_eq!(cmd.cages[0].conditions[0].column, "active");
611        assert_eq!(cmd.cages[0].conditions[0].op, Operator::Eq);
612        assert_eq!(cmd.cages[0].conditions[0].value, Value::Bool(true));
613    }
614
615    #[test]
616    fn test_get_with_limit() {
617        let cmd = parse("get::users•@*[lim=10]").unwrap();
618        assert_eq!(cmd.cages.len(), 1);
619        assert_eq!(cmd.cages[0].kind, CageKind::Limit(10));
620    }
621
622    #[test]
623    fn test_get_with_sort_desc() {
624        let cmd = parse("get::users•@*[^!created_at]").unwrap();
625        assert_eq!(cmd.cages.len(), 1);
626        assert_eq!(cmd.cages[0].kind, CageKind::Sort(SortOrder::Desc));
627    }
628
629    #[test]
630    fn test_set_command() {
631        let cmd = parse("set::users•[verified=true][id=$1]").unwrap();
632        assert_eq!(cmd.action, Action::Set);
633        assert_eq!(cmd.table, "users");
634        assert_eq!(cmd.cages.len(), 2);
635    }
636
637    #[test]
638    fn test_del_command() {
639        let cmd = parse("del::sessions•[expired_at<now]").unwrap();
640        assert_eq!(cmd.action, Action::Del);
641        assert_eq!(cmd.table, "sessions");
642    }
643
644    #[test]
645    fn test_fuzzy_match() {
646        let cmd = parse("get::users•@*[name~$1]").unwrap();
647        assert_eq!(cmd.cages[0].conditions[0].op, Operator::Fuzzy);
648    }
649
650    #[test]
651    fn test_complex_query() {
652        let cmd = parse("get::users•@id@email@role[active=true][lim=10]").unwrap();
653        assert_eq!(cmd.action, Action::Get);
654        assert_eq!(cmd.table, "users");
655        assert_eq!(cmd.columns.len(), 3);
656        assert_eq!(cmd.cages.len(), 2);
657    }
658
659    #[test]
660    fn test_param_in_filter() {
661        let cmd = parse("get::users•@*[id=$1]").unwrap();
662        assert_eq!(cmd.cages.len(), 1);
663        assert_eq!(cmd.cages[0].conditions[0].value, Value::Param(1));
664    }
665
666    #[test]
667    fn test_param_in_update() {
668        let cmd = parse("set::users•[verified=true][id=$1]").unwrap();
669        assert_eq!(cmd.action, Action::Set);
670        assert_eq!(cmd.cages.len(), 2);
671        // Second cage should have the $1 param
672        assert_eq!(cmd.cages[1].conditions[0].value, Value::Param(1));
673    }
674}
675