Skip to main content

qail_core/parser/grammar/
mod.rs

1/// Base parsing utilities (identifiers, literals, whitespace).
2pub mod base;
3/// Binary operator parsing (AND, OR, arithmetic).
4pub mod binary_ops;
5/// CASE WHEN expression parsing.
6pub mod case_when;
7/// Clause parsing (WHERE, ORDER BY, LIMIT, etc.).
8pub mod clauses;
9/// Common Table Expression (WITH) parsing.
10pub mod cte;
11/// Data Definition Language parsing (CREATE TABLE, INDEX).
12pub mod ddl;
13/// Data Manipulation Language parsing (INSERT values, ON CONFLICT).
14pub mod dml;
15/// Expression parsing (columns, functions, sub-expressions).
16pub mod expressions;
17/// Function call parsing.
18pub mod functions;
19/// JOIN clause parsing.
20pub mod joins;
21/// Special function parsing (COALESCE, NULLIF, GREATEST, etc.).
22pub mod special_funcs;
23
24use self::base::*;
25use self::clauses::*;
26use self::ddl::*;
27use self::dml::*;
28use self::joins::*;
29use crate::ast::*;
30use nom::{
31    IResult, Parser,
32    bytes::complete::tag_no_case,
33    character::complete::{multispace0, multispace1},
34    combinator::opt,
35    multi::many0,
36};
37// use self::expressions::*; // Used in clauses module
38
39/// Parse a QAIL query with comment preprocessing.
40/// This is the recommended entry point - handles SQL comment stripping
41/// and `table[filter]` shorthand desugaring.
42pub fn parse(input: &str) -> Result<Qail, String> {
43    let cleaned = strip_sql_comments(input);
44    // Desugar table[filter] shorthand: "set users[active = true] fields ..."
45    // → "set users fields ... where active = true"
46    let desugared = desugar_bracket_filter(&cleaned);
47    match parse_root(&desugared) {
48        Ok(("", cmd)) => Ok(cmd),
49        Ok((remaining, _)) => Err(format!("Unexpected trailing content: '{}'", remaining)),
50        Err(e) => Err(format!("Parse error: {:?}", e)),
51    }
52}
53
54/// Desugar `table[filter]` shorthand into `table ... where filter`.
55/// Transforms: `action table[cond] rest` → `action table rest where cond`
56fn desugar_bracket_filter(input: &str) -> String {
57    let trimmed = input.trim();
58    // Find the opening bracket after the table name
59    // Must be: action<ws>table[...] — the [ must immediately follow the table name
60    if let Some(bracket_start) = trimmed.find('[') {
61        // Ensure the bracket is in the table position (after action + space + identifier)
62        let before_bracket = &trimmed[..bracket_start];
63        // There should be at least "action table" before the bracket
64        if !before_bracket.contains(' ') {
65            return trimmed.to_string();
66        }
67
68        // Guard: don't treat brackets in clauses/values as table shorthand.
69        // Example to avoid: `... where tags && '["a","b"]'`
70        let before_lower = before_bracket.to_ascii_lowercase();
71        if before_lower.contains(" where ")
72            || before_lower.contains(" fields ")
73            || before_lower.contains(" having ")
74            || before_lower.contains(" order ")
75            || before_lower.contains(" limit ")
76            || before_lower.contains(" offset ")
77            || before_lower.contains(" join ")
78        {
79            return trimmed.to_string();
80        }
81
82        // Find matching closing bracket, respecting nesting and quotes
83        let after_bracket = &trimmed[bracket_start + 1..];
84        let mut depth = 1;
85        let mut in_single_quote = false;
86        let mut in_double_quote = false;
87        let mut bracket_end = None;
88
89        for (i, c) in after_bracket.char_indices() {
90            match c {
91                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
92                '"' if !in_single_quote => in_double_quote = !in_double_quote,
93                '[' if !in_single_quote && !in_double_quote => depth += 1,
94                ']' if !in_single_quote && !in_double_quote => {
95                    depth -= 1;
96                    if depth == 0 {
97                        bracket_end = Some(i);
98                        break;
99                    }
100                }
101                _ => {}
102            }
103        }
104
105        if let Some(end_pos) = bracket_end {
106            let filter = &after_bracket[..end_pos];
107            let rest = &after_bracket[end_pos + 1..].trim();
108
109            // Check if there's already a "where" in the rest
110            let rest_lower = rest.to_lowercase();
111            if rest_lower.contains("where ") || rest_lower.contains("where\n") {
112                // Already has WHERE — append with AND
113                return format!("{} {} AND {}", before_bracket, rest, filter);
114            } else if rest.is_empty() {
115                return format!("{} where {}", before_bracket, filter);
116            } else {
117                return format!("{} {} where {}", before_bracket, rest, filter);
118            }
119        }
120    }
121    trimmed.to_string()
122}
123
124/// Parse a QAIL query (root entry point).
125/// Note: Does NOT strip comments. Use `parse()` for automatic comment handling.
126pub fn parse_root(input: &str) -> IResult<&str, Qail> {
127    let input = input.trim();
128
129    // Try transaction commands first (single keywords)
130    if let Ok((remaining, cmd)) = parse_txn_command(input) {
131        return Ok((remaining, cmd));
132    }
133
134    // Parse procedural/session commands that don't follow `action table ...`
135    if let Ok((remaining, cmd)) = parse_procedural_command(input) {
136        return Ok((remaining, cmd));
137    }
138
139    // Try CREATE INDEX first (special case: "index name on table ...")
140    if let Ok((remaining, cmd)) = parse_create_index(input) {
141        return Ok((remaining, cmd));
142    }
143
144    // Try WITH clause (CTE) parsing
145    let lower_input = input.to_lowercase();
146    let (input, ctes) = if lower_input.starts_with("with")
147        && lower_input
148            .chars()
149            .nth(4)
150            .map(|c| c.is_whitespace())
151            .unwrap_or(false)
152    {
153        let (remaining, (cte_defs, _is_recursive)) = cte::parse_with_clause(input)?;
154        let (remaining, _) = multispace0(remaining)?;
155        (remaining, cte_defs)
156    } else {
157        (input, vec![])
158    };
159
160    let (input, (action, distinct)) = parse_action(input)?;
161    // v2 syntax only: whitespace separator between action and table
162    let (input, _) = multispace1(input)?;
163
164    // Supports expressions like: CASE WHEN ... END, functions, columns
165    let (input, distinct_on) = if distinct {
166        // If already parsed "get distinct", check for "on (...)"
167        if let Ok((remaining, _)) = tag_no_case::<_, _, nom::error::Error<&str>>("on").parse(input)
168        {
169            let (remaining, _) = multispace0(remaining)?;
170            let (remaining, exprs) = nom::sequence::delimited(
171                nom::character::complete::char('('),
172                nom::multi::separated_list1(
173                    (
174                        multispace0,
175                        nom::character::complete::char(','),
176                        multispace0,
177                    ),
178                    expressions::parse_expression,
179                ),
180                nom::character::complete::char(')'),
181            )
182            .parse(remaining)?;
183            let (remaining, _) = multispace1(remaining)?;
184            (remaining, exprs)
185        } else {
186            (input, vec![])
187        }
188    } else {
189        (input, vec![])
190    };
191
192    //  Parse table name
193    let (input, table) = parse_identifier(input)?;
194    let (input, _) = multispace0(input)?;
195
196    // For MAKE (CREATE TABLE): parse column definitions
197    if matches!(action, Action::Make) {
198        return parse_create_table(input, table);
199    }
200
201    let (input, joins) = many0(parse_join_clause).parse(input)?;
202    let (input, _) = multispace0(input)?;
203
204    // For SET/UPDATE: parse "values col = val, col2 = val2" before fields
205    let (input, set_cages) = if matches!(action, Action::Set) {
206        opt(parse_values_clause).parse(input)?
207    } else {
208        (input, None)
209    };
210    let (input, _) = multispace0(input)?;
211
212    let (input, columns) = opt(parse_fields_clause).parse(input)?;
213    let (input, _) = multispace0(input)?;
214
215    // For ADD/INSERT: try "from (get ...)" first, then fall back to "values val1, val2"
216    let (input, source_query) = if matches!(action, Action::Add) {
217        opt(dml::parse_source_query).parse(input)?
218    } else {
219        (input, None)
220    };
221    let (input, _) = multispace0(input)?;
222
223    // Only parse values if no source_query (INSERT...SELECT takes precedence)
224    let (input, add_cages) = if source_query.is_none() && matches!(action, Action::Add) {
225        opt(dml::parse_insert_values).parse(input)?
226    } else {
227        (input, None)
228    };
229    let (input, _) = multispace0(input)?;
230
231    let (input, where_cages) = opt(parse_where_clause).parse(input)?;
232    let (input, _) = multispace0(input)?;
233
234    let (input, having) = opt(parse_having_clause).parse(input)?;
235    let (input, _) = multispace0(input)?;
236
237    let (input, on_conflict) = if matches!(action, Action::Add) {
238        opt(dml::parse_on_conflict).parse(input)?
239    } else {
240        (input, None)
241    };
242    let (input, _) = multispace0(input)?;
243
244    let (input, order_cages) = opt(parse_order_by_clause).parse(input)?;
245    let (input, _) = multispace0(input)?;
246    let (input, limit_cage) = opt(parse_limit_clause).parse(input)?;
247    let (input, _) = multispace0(input)?;
248    let (input, offset_cage) = opt(parse_offset_clause).parse(input)?;
249
250    let mut cages = Vec::new();
251
252    // For SET, values come first (as Payload cage)
253    if let Some(sc) = set_cages {
254        cages.push(sc);
255    }
256
257    // For ADD, values come as Payload cage too
258    if let Some(ac) = add_cages {
259        cages.push(ac);
260    }
261
262    if let Some(wc) = where_cages {
263        cages.extend(wc);
264    }
265    if let Some(oc) = order_cages {
266        cages.extend(oc);
267    }
268    if let Some(lc) = limit_cage {
269        cages.push(lc);
270    }
271    if let Some(oc) = offset_cage {
272        cages.push(oc);
273    }
274
275    Ok((
276        input,
277        Qail {
278            action,
279            table: table.to_string(),
280            columns: columns.unwrap_or_else(|| vec![Expr::Star]),
281            joins,
282            cages,
283            distinct,
284            distinct_on,
285            index_def: None,
286            table_constraints: vec![],
287            set_ops: vec![],
288            having: having.unwrap_or_default(),
289            group_by_mode: GroupByMode::default(),
290            returning: None,
291            ctes,
292            on_conflict,
293            source_query,
294            channel: None,
295            payload: None,
296            savepoint_name: None,
297            from_tables: vec![],
298            using_tables: vec![],
299            lock_mode: None,
300            skip_locked: false,
301            fetch: None,
302            default_values: false,
303            overriding: None,
304            sample: None,
305            only_table: false,
306            vector: None,
307            score_threshold: None,
308            vector_name: None,
309            with_vector: false,
310            vector_size: None,
311            distance: None,
312            on_disk: None,
313            function_def: None,
314            trigger_def: None,
315        },
316    ))
317}
318
319/// Strip SQL comments from input (both -- line comments and /* */ block comments)
320fn strip_sql_comments(input: &str) -> String {
321    let mut result = String::with_capacity(input.len());
322    let mut chars = input.chars().peekable();
323
324    while let Some(c) = chars.next() {
325        if c == '-' && chars.peek() == Some(&'-') {
326            // Line comment: skip until end of line
327            chars.next(); // consume second -
328            while let Some(&nc) = chars.peek() {
329                if nc == '\n' {
330                    result.push('\n'); // preserve newline
331                    chars.next();
332                    break;
333                }
334                chars.next();
335            }
336        } else if c == '/' && chars.peek() == Some(&'*') {
337            // Block comment: skip until */
338            chars.next(); // consume *
339            let mut closed = false;
340            while let Some(nc) = chars.next() {
341                if nc == '*' && chars.peek() == Some(&'/') {
342                    chars.next(); // consume /
343                    result.push(' '); // replace with space to preserve separation
344                    closed = true;
345                    break;
346                }
347            }
348            if !closed {
349                // Unclosed block comment — preserve raw text so parser reports error
350                result.push_str("/*");
351            }
352        } else {
353            result.push(c);
354        }
355    }
356
357    result
358}