qail_core/
parser.rs

1//! QAIL Parser using nom.
2//!
3//! Parses QAIL syntax into an AST.
4//!
5//! # Syntax Overview
6//!
7//! ```text
8//! get::users•@id@email[active=true][lim=10]
9//! ─┬─ ─┬─  ┬ ─────┬───── ─────┬──────────
10//!  │   │   │      │           │
11//!  │   │   │      │           └── Cages (filters, limits)
12//!  │   │   │      └── Hooks (columns)
13//!  │   │   └── Pivot (connects to table)
14//!  │   └── Table name
15//!  └── Gate (action)
16//! ```
17
18use nom::{
19    branch::alt,
20    bytes::complete::{tag, take_while, take_while1},
21    character::complete::{char, digit1, multispace1, not_line_ending},
22    combinator::{map, opt, recognize, value},
23    multi::many0,
24    sequence::{pair, preceded, tuple},
25    IResult,
26};
27
28use crate::ast::*;
29use crate::error::{QailError, QailResult};
30
31/// Parse whitespace or comments.
32fn ws_or_comment(input: &str) -> IResult<&str, ()> {
33    value((), many0(alt((
34        value((), multispace1),
35        parse_comment,
36    ))))(input)
37}
38
39/// Parse a single comment line (// ... or -- ...).
40fn parse_comment(input: &str) -> IResult<&str, ()> {
41    value((), pair(alt((tag("//"), tag("--"))), not_line_ending))(input)
42}
43
44/// Parse a complete QAIL query string.
45pub fn parse(input: &str) -> QailResult<QailCmd> {
46    let input = input.trim();
47    
48    match parse_qail_cmd(input) {
49        Ok(("", cmd)) => Ok(cmd),
50        Ok((remaining, _)) => Err(QailError::parse(
51            input.len() - remaining.len(),
52            format!("Unexpected trailing content: '{}'", remaining),
53        )),
54        Err(e) => Err(QailError::parse(0, format!("Parse failed: {:?}", e))),
55    }
56}
57
58/// Parse the complete QAIL command.
59fn parse_qail_cmd(input: &str) -> IResult<&str, QailCmd> {
60    let (input, action) = parse_action(input)?;
61    let (input, _) = tag("::")(input)?;
62    let (input, table) = parse_identifier(input)?;
63    let (input, joins) = parse_joins(input)?;
64    let (input, _) = ws_or_comment(input)?; // Allow ws/comment before pivot
65    let (input, _) = opt(tag("•"))(input)?; // Pivot is optional if no columns
66    let (input, _) = ws_or_comment(input)?;
67    let (input, columns) = parse_columns(input)?;
68    let (input, _) = ws_or_comment(input)?;
69    let (input, cages) = parse_cages(input)?;
70
71    Ok((
72        input,
73        QailCmd {
74            action,
75            table: table.to_string(),
76            joins,
77            columns,
78            cages,
79        },
80    ))
81}
82
83/// Parse the action (get, set, del, add, gen).
84fn parse_action(input: &str) -> IResult<&str, Action> {
85    alt((
86        value(Action::Get, tag("get")),
87        value(Action::Set, tag("set")),
88        value(Action::Del, tag("del")),
89        value(Action::Add, tag("add")),
90        value(Action::Gen, tag("gen")),
91        value(Action::Make, tag("make")),
92        value(Action::Mod, tag("mod")),
93    ))(input)
94}
95
96/// Parse an identifier (table name, column name).
97fn parse_identifier(input: &str) -> IResult<&str, &str> {
98    take_while1(|c: char| c.is_alphanumeric() || c == '_')(input)
99}
100
101fn parse_joins(input: &str) -> IResult<&str, Vec<Join>> {
102    many0(map(preceded(preceded(ws_or_comment, tag("->")), parse_identifier), |t| Join {
103        table: t.to_string(),
104        kind: JoinKind::Inner,
105    }))(input)
106}
107
108/// Parse columns (hooks).
109fn parse_columns(input: &str) -> IResult<&str, Vec<Column>> {
110    many0(preceded(ws_or_comment, parse_any_column))(input)
111}
112
113fn parse_any_column(input: &str) -> IResult<&str, Column> {
114    alt((
115        // Standard Hook: @col...
116        preceded(char('@'), parse_at_column),
117        // Add Hook: +col...
118        preceded(char('+'), parse_add_column),
119        // Drop Hook: -col... (can also be @-col if user mixes styles, but strict parser uses -)
120        preceded(char('-'), parse_drop_column),
121    ))(input)
122}
123
124fn parse_at_column(input: &str) -> IResult<&str, Column> {
125    alt((
126        value(Column::Star, char('*')),
127        // Check for drop via @-name convention if needed, essentially mapping @-name to Mod Drop
128        map(preceded(char('-'), parse_identifier), |name| Column::Mod { 
129            kind: ModKind::Drop, 
130            col: Box::new(Column::Named(name.to_string())) 
131        }),
132        parse_column_full_def_or_named, 
133    ))(input)
134}
135
136fn parse_add_column(input: &str) -> IResult<&str, Column> {
137    map(parse_column_full_def_or_named, |col| Column::Mod {
138        kind: ModKind::Add,
139        col: Box::new(col),
140    })(input)
141}
142
143fn parse_drop_column(input: &str) -> IResult<&str, Column> {
144    map(parse_identifier, |name| Column::Mod {
145        kind: ModKind::Drop,
146        col: Box::new(Column::Named(name.to_string())),
147    })(input)
148}
149
150fn parse_column_full_def_or_named(input: &str) -> IResult<&str, Column> {
151    // 1. Parse Name
152    let (input, name) = parse_identifier(input)?;
153    
154    // 2. Opt: Aggregates (#func)
155    if let Ok((input, Some(func))) = opt(preceded(char('#'), parse_agg_func))(input) {
156        return Ok((input, Column::Aggregate {
157             col: name.to_string(),
158             func
159        }));
160    }
161    
162    // 3. Opt: Type Definition (:type)
163    let (input, data_type) = opt(preceded(char(':'), parse_identifier))(input)?;
164    
165    // 4. Opt: Constraints (^pk, ^uniq, ?)
166    let (input, constraints) = parse_constraints(input)?;
167    
168    if let Some(dt) = data_type {
169        // It's a Definition
170        Ok((input, Column::Def { 
171            name: name.to_string(), 
172            data_type: dt.to_string(), 
173            constraints 
174        }))
175    } else if !constraints.is_empty() {
176         // Has constraints but no type? Assume inferred or default, treat as Def
177         Ok((input, Column::Def { 
178            name: name.to_string(), 
179            data_type: "str".to_string(), // Default or error? For now default strict, maybe str
180            constraints 
181        }))
182    } else {
183        // Just a named column
184        Ok((input, Column::Named(name.to_string())))
185    }
186}
187
188fn parse_constraints(input: &str) -> IResult<&str, Vec<Constraint>> {
189    many0(alt((
190        value(Constraint::PrimaryKey, tag("^pk")),
191        value(Constraint::Unique, tag("^uniq")),
192        value(Constraint::Nullable, char('?')),
193    )))(input)
194}
195
196fn parse_agg_func(input: &str) -> IResult<&str, AggregateFunc> {
197    alt((
198        value(AggregateFunc::Count, tag("count")),
199        value(AggregateFunc::Sum, tag("sum")),
200        value(AggregateFunc::Avg, tag("avg")),
201        value(AggregateFunc::Min, tag("min")),
202        value(AggregateFunc::Max, tag("max")),
203    ))(input)
204}
205
206/// Parse all cages.
207fn parse_cages(input: &str) -> IResult<&str, Vec<Cage>> {
208    many0(preceded(ws_or_comment, parse_cage))(input)
209}
210
211/// Parse a single cage [...].
212fn parse_cage(input: &str) -> IResult<&str, Cage> {
213    let (input, _) = char('[')(input)?;
214    let (input, _) = ws_or_comment(input)?;
215    
216    // Check for special cage types
217    if let Ok((remaining, cage)) = parse_limit_cage(input) {
218        let (remaining, _) = ws_or_comment(remaining)?;
219        let (remaining, _) = char(']')(remaining)?;
220        return Ok((remaining, cage));
221    }
222    
223    if let Ok((remaining, cage)) = parse_sort_cage(input) {
224        let (remaining, _) = ws_or_comment(remaining)?;
225        let (remaining, _) = char(']')(remaining)?;
226        return Ok((remaining, cage));
227    }
228    
229    // Otherwise, parse as filter conditions
230    let (input, (conditions, logical_op)) = parse_conditions(input)?;
231    let (input, _) = ws_or_comment(input)?;
232    let (input, _) = char(']')(input)?;
233    
234    Ok((
235        input,
236        Cage {
237            kind: CageKind::Filter,
238            conditions,
239            logical_op,
240        },
241    ))
242}
243
244/// Parse limit cage [lim=N].
245fn parse_limit_cage(input: &str) -> IResult<&str, Cage> {
246    let (input, _) = tag("lim")(input)?;
247    let (input, _) = ws_or_comment(input)?;
248    let (input, _) = char('=')(input)?;
249    let (input, _) = ws_or_comment(input)?;
250    let (input, n) = digit1(input)?;
251    
252    Ok((
253        input,
254        Cage {
255            kind: CageKind::Limit(n.parse().unwrap_or(10)),
256            conditions: vec![],
257            logical_op: LogicalOp::And,
258        },
259    ))
260}
261
262/// Parse sort cage [^col] or [^!col].
263fn parse_sort_cage(input: &str) -> IResult<&str, Cage> {
264    let (input, _) = char('^')(input)?;
265    let (input, desc) = opt(char('!'))(input)?;
266    let (input, col) = parse_identifier(input)?;
267    
268    let order = if desc.is_some() {
269        SortOrder::Desc
270    } else {
271        SortOrder::Asc
272    };
273    
274    Ok((
275        input,
276        Cage {
277            kind: CageKind::Sort(order),
278            conditions: vec![Condition {
279                column: col.to_string(),
280                op: Operator::Eq,
281                value: Value::Null,
282                is_array_unnest: false,
283            }],
284            logical_op: LogicalOp::And,
285        },
286    ))
287}
288
289/// Parse conditions within a cage, returning both conditions and the logical operator.
290fn parse_conditions(input: &str) -> IResult<&str, (Vec<Condition>, LogicalOp)> {
291    // Parse first condition
292    let (input, first) = parse_condition(input)?;
293    let mut conditions = vec![first];
294    let mut logical_op = LogicalOp::And;
295    
296    // Parse remaining conditions with their operators
297    let mut remaining = input;
298    loop {
299        // Skip whitespace
300        let (input, _) = ws_or_comment(remaining)?;
301        
302        // Check for operator character
303        let first_char = input.chars().next();
304        match first_char {
305            Some('|') => {
306                logical_op = LogicalOp::Or;
307                let input = &input[1..]; // consume '|'
308                let (input, _) = ws_or_comment(input)?;
309                let (input, cond) = parse_condition(input)?;
310                conditions.push(cond);
311                remaining = input;
312            }
313            Some('&') => {
314                let input = &input[1..]; // consume '&'
315                let (input, _) = ws_or_comment(input)?;
316                let (input, cond) = parse_condition(input)?;
317                conditions.push(cond);
318                remaining = input;
319            }
320            _ => break,
321        }
322    }
323    
324    Ok((remaining, (conditions, logical_op)))
325}
326
327/// Parse a single condition.
328fn parse_condition(input: &str) -> IResult<&str, Condition> {
329    let (input, column) = parse_identifier(input)?;
330    
331    // Check for array unnest syntax: column[*]
332    let (input, is_array_unnest) = if input.starts_with("[*]") {
333        (&input[3..], true) // consume "[*]"
334    } else {
335        (input, false)
336    };
337    
338    let (input, _) = ws_or_comment(input)?;
339    let (input, (op, value)) = parse_operator_and_value(input)?;
340    
341    Ok((
342        input,
343        Condition {
344            column: column.to_string(),
345            op,
346            value,
347            is_array_unnest,
348        },
349    ))
350}
351
352/// Parse operator and value together.
353fn parse_operator_and_value(input: &str) -> IResult<&str, (Operator, Value)> {
354    alt((
355        // Fuzzy match: ~value
356        map(preceded(char('~'), parse_value), |v| (Operator::Fuzzy, v)),
357        // Greater than or equal: >=value
358        map(preceded(tag(">="), parse_value), |v| (Operator::Gte, v)),
359        // Less than or equal: <=value
360        map(preceded(tag("<="), parse_value), |v| (Operator::Lte, v)),
361        // Not equal: !=value
362        map(preceded(tag("!="), parse_value), |v| (Operator::Ne, v)),
363        // Greater than: >value
364        map(preceded(char('>'), parse_value), |v| (Operator::Gt, v)),
365        // Less than: <value
366        map(preceded(char('<'), parse_value), |v| (Operator::Lt, v)),
367        // Equal: =value
368        map(preceded(char('='), parse_value), |v| (Operator::Eq, v)),
369    ))(input)
370}
371
372/// Parse a value.
373fn parse_value(input: &str) -> IResult<&str, Value> {
374    let (input, _) = ws_or_comment(input)?;
375    
376    alt((
377        // Parameter: $1, $2, etc.
378        map(preceded(char('$'), digit1), |n: &str| {
379            Value::Param(n.parse().unwrap_or(1))
380        }),
381        // Boolean: true/false
382        value(Value::Bool(true), tag("true")),
383        value(Value::Bool(false), tag("false")),
384        // Function: name()
385        map(
386            recognize(pair(parse_identifier, tag("()"))),
387            |s: &str| Value::Function(s.trim_end_matches("()").to_string()),
388        ),
389        // Function without parens: now, etc.
390        map(tag("now"), |_| Value::Function("now".to_string())),
391        // Number (float or int)
392        parse_number,
393        // Quoted string
394        parse_quoted_string,
395        // Bare identifier (treated as string)
396        map(parse_identifier, |s| Value::String(s.to_string())),
397    ))(input)
398}
399
400/// Parse a number (integer or float).
401fn parse_number(input: &str) -> IResult<&str, Value> {
402    let (input, num_str) = recognize(tuple((
403        opt(char('-')),
404        digit1,
405        opt(pair(char('.'), digit1)),
406    )))(input)?;
407    
408    if num_str.contains('.') {
409        Ok((input, Value::Float(num_str.parse().unwrap_or(0.0))))
410    } else {
411        Ok((input, Value::Int(num_str.parse().unwrap_or(0))))
412    }
413}
414
415/// Parse a quoted string.
416fn parse_quoted_string(input: &str) -> IResult<&str, Value> {
417    let (input, _) = char('\'')(input)?;
418    let (input, content) = take_while(|c| c != '\'')(input)?;
419    let (input, _) = char('\'')(input)?;
420    
421    Ok((input, Value::String(content.to_string())))
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[test]
429    fn test_simple_get() {
430        let cmd = parse("get::users•@*").unwrap();
431        assert_eq!(cmd.action, Action::Get);
432        assert_eq!(cmd.table, "users");
433        assert_eq!(cmd.columns, vec![Column::Star]);
434    }
435
436    #[test]
437    fn test_get_with_columns() {
438        let cmd = parse("get::users•@id@email@role").unwrap();
439        assert_eq!(cmd.action, Action::Get);
440        assert_eq!(cmd.table, "users");
441        assert_eq!(
442            cmd.columns,
443            vec![
444                Column::Named("id".to_string()),
445                Column::Named("email".to_string()),
446                Column::Named("role".to_string()),
447            ]
448        );
449    }
450
451    #[test]
452    fn test_get_with_filter() {
453        let cmd = parse("get::users•@*[active=true]").unwrap();
454        assert_eq!(cmd.cages.len(), 1);
455        assert_eq!(cmd.cages[0].kind, CageKind::Filter);
456        assert_eq!(cmd.cages[0].conditions.len(), 1);
457        assert_eq!(cmd.cages[0].conditions[0].column, "active");
458        assert_eq!(cmd.cages[0].conditions[0].op, Operator::Eq);
459        assert_eq!(cmd.cages[0].conditions[0].value, Value::Bool(true));
460    }
461
462    #[test]
463    fn test_get_with_limit() {
464        let cmd = parse("get::users•@*[lim=10]").unwrap();
465        assert_eq!(cmd.cages.len(), 1);
466        assert_eq!(cmd.cages[0].kind, CageKind::Limit(10));
467    }
468
469    #[test]
470    fn test_get_with_sort_desc() {
471        let cmd = parse("get::users•@*[^!created_at]").unwrap();
472        assert_eq!(cmd.cages.len(), 1);
473        assert_eq!(cmd.cages[0].kind, CageKind::Sort(SortOrder::Desc));
474    }
475
476    #[test]
477    fn test_set_command() {
478        let cmd = parse("set::users•[verified=true][id=$1]").unwrap();
479        assert_eq!(cmd.action, Action::Set);
480        assert_eq!(cmd.table, "users");
481        assert_eq!(cmd.cages.len(), 2);
482    }
483
484    #[test]
485    fn test_del_command() {
486        let cmd = parse("del::sessions•[expired_at<now]").unwrap();
487        assert_eq!(cmd.action, Action::Del);
488        assert_eq!(cmd.table, "sessions");
489    }
490
491    #[test]
492    fn test_fuzzy_match() {
493        let cmd = parse("get::users•@*[name~$1]").unwrap();
494        assert_eq!(cmd.cages[0].conditions[0].op, Operator::Fuzzy);
495    }
496
497    #[test]
498    fn test_complex_query() {
499        let cmd = parse("get::users•@id@email@role[active=true][lim=10]").unwrap();
500        assert_eq!(cmd.action, Action::Get);
501        assert_eq!(cmd.table, "users");
502        assert_eq!(cmd.columns.len(), 3);
503        assert_eq!(cmd.cages.len(), 2);
504    }
505
506    #[test]
507    fn test_param_in_filter() {
508        let cmd = parse("get::users•@*[id=$1]").unwrap();
509        assert_eq!(cmd.cages.len(), 1);
510        assert_eq!(cmd.cages[0].conditions[0].value, Value::Param(1));
511    }
512
513    #[test]
514    fn test_param_in_update() {
515        let cmd = parse("set::users•[verified=true][id=$1]").unwrap();
516        assert_eq!(cmd.action, Action::Set);
517        assert_eq!(cmd.cages.len(), 2);
518        // Second cage should have the $1 param
519        assert_eq!(cmd.cages[1].conditions[0].value, Value::Param(1));
520    }
521}
522