Skip to main content

qail_core/parser/grammar/
mod.rs

1/// Base parsing utilities (identifiers, literals, whitespace).
2pub mod base;
3/// Binary operator parsing (AND, OR, arithmetic).
4pub mod binary_ops;
5/// CASE WHEN expression parsing.
6pub mod case_when;
7/// Clause parsing (WHERE, ORDER BY, LIMIT, etc.).
8pub mod clauses;
9/// Common Table Expression (WITH) parsing.
10pub mod cte;
11/// Data Definition Language parsing (CREATE TABLE, INDEX).
12pub mod ddl;
13/// Data Manipulation Language parsing (INSERT values, ON CONFLICT).
14pub mod dml;
15/// Expression parsing (columns, functions, sub-expressions).
16pub mod expressions;
17/// Function call parsing.
18pub mod functions;
19/// JOIN clause parsing.
20pub mod joins;
21/// PostgreSQL MERGE parsing.
22pub mod merge;
23/// Special function parsing (COALESCE, NULLIF, GREATEST, etc.).
24pub mod special_funcs;
25
26use self::base::*;
27use self::clauses::*;
28use self::ddl::*;
29use self::dml::*;
30use self::joins::*;
31use crate::ast::*;
32use nom::{
33    IResult, Parser,
34    bytes::complete::tag_no_case,
35    character::complete::{multispace0, multispace1},
36    combinator::opt,
37    multi::many0,
38};
39// use self::expressions::*; // Used in clauses module
40
41/// Parse a QAIL query with comment preprocessing.
42/// This is the recommended entry point - handles SQL comment stripping
43/// and `table[filter]` shorthand desugaring.
44pub fn parse(input: &str) -> Result<Qail, String> {
45    let cleaned = strip_sql_comments(input);
46    // Desugar table[filter] shorthand: "set users[active = true] fields ..."
47    // → "set users fields ... where active = true"
48    let desugared = desugar_bracket_filter(&cleaned);
49    match parse_root(&desugared) {
50        Ok(("", cmd)) => Ok(cmd),
51        Ok((remaining, _)) => Err(format!("Unexpected trailing content: '{}'", remaining)),
52        Err(e) => Err(format!("Parse error: {:?}", e)),
53    }
54}
55
56/// Desugar `table[filter]` shorthand into `table ... where filter`.
57/// Transforms: `action table[cond] rest` → `action table rest where cond`
58fn desugar_bracket_filter(input: &str) -> String {
59    let trimmed = input.trim();
60    // Find the opening bracket after the table name
61    // Must be: action<ws>table[...] — the [ must immediately follow the table name
62    if let Some(bracket_start) = trimmed.find('[') {
63        // Ensure the bracket is in the table position (after action + space + identifier)
64        let before_bracket = &trimmed[..bracket_start];
65        // There should be at least "action table" before the bracket
66        if !before_bracket.contains(' ') {
67            return trimmed.to_string();
68        }
69
70        // Guard: don't treat brackets in clauses/values as table shorthand.
71        // Example to avoid: `... where tags && '["a","b"]'`
72        let before_lower = before_bracket.to_ascii_lowercase();
73        if before_lower.contains(" where ")
74            || before_lower.contains(" fields ")
75            || before_lower.contains(" having ")
76            || before_lower.contains(" order ")
77            || before_lower.contains(" limit ")
78            || before_lower.contains(" offset ")
79            || before_lower.contains(" join ")
80        {
81            return trimmed.to_string();
82        }
83
84        // Find matching closing bracket, respecting nesting and quotes
85        let after_bracket = &trimmed[bracket_start + 1..];
86        let mut depth = 1;
87        let mut in_single_quote = false;
88        let mut in_double_quote = false;
89        let mut bracket_end = None;
90
91        for (i, c) in after_bracket.char_indices() {
92            match c {
93                '\'' if !in_double_quote => in_single_quote = !in_single_quote,
94                '"' if !in_single_quote => in_double_quote = !in_double_quote,
95                '[' if !in_single_quote && !in_double_quote => depth += 1,
96                ']' if !in_single_quote && !in_double_quote => {
97                    depth -= 1;
98                    if depth == 0 {
99                        bracket_end = Some(i);
100                        break;
101                    }
102                }
103                _ => {}
104            }
105        }
106
107        if let Some(end_pos) = bracket_end {
108            let filter = &after_bracket[..end_pos];
109            let rest = &after_bracket[end_pos + 1..].trim();
110
111            // Check if there's already a "where" in the rest
112            let rest_lower = rest.to_lowercase();
113            if rest_lower.contains("where ") || rest_lower.contains("where\n") {
114                // Already has WHERE — append with AND
115                return format!("{} {} AND {}", before_bracket, rest, filter);
116            } else if rest.is_empty() {
117                return format!("{} where {}", before_bracket, filter);
118            } else {
119                return format!("{} {} where {}", before_bracket, rest, filter);
120            }
121        }
122    }
123    trimmed.to_string()
124}
125
126/// Parse a QAIL query (root entry point).
127/// Note: Does NOT strip comments. Use `parse()` for automatic comment handling.
128pub fn parse_root(input: &str) -> IResult<&str, Qail> {
129    let input = input.trim();
130
131    // Try transaction commands first (single keywords)
132    if let Ok((remaining, cmd)) = parse_txn_command(input) {
133        return Ok((remaining, cmd));
134    }
135
136    // Parse procedural/session commands that don't follow `action table ...`
137    if let Ok((remaining, cmd)) = parse_procedural_command(input) {
138        return Ok((remaining, cmd));
139    }
140
141    // Try CREATE INDEX first (special case: "index name on table ...")
142    if let Ok((remaining, cmd)) = parse_create_index(input) {
143        return Ok((remaining, cmd));
144    }
145
146    // Try WITH clause (CTE) parsing
147    let lower_input = input.to_lowercase();
148    let (input, ctes) = if lower_input.starts_with("with")
149        && lower_input
150            .chars()
151            .nth(4)
152            .map(|c| c.is_whitespace())
153            .unwrap_or(false)
154    {
155        let (remaining, (cte_defs, _is_recursive)) = cte::parse_with_clause(input)?;
156        let (remaining, _) = multispace0(remaining)?;
157        (remaining, cte_defs)
158    } else {
159        (input, vec![])
160    };
161
162    let (input, (action, distinct)) = parse_action(input)?;
163    // v2 syntax only: whitespace separator between action and table
164    let (input, _) = multispace1(input)?;
165
166    // Supports expressions like: CASE WHEN ... END, functions, columns
167    let (input, distinct_on) = if distinct {
168        // If already parsed "get distinct", check for "on (...)"
169        if let Ok((remaining, _)) = tag_no_case::<_, _, nom::error::Error<&str>>("on").parse(input)
170        {
171            let (remaining, _) = multispace0(remaining)?;
172            let (remaining, exprs) = nom::sequence::delimited(
173                nom::character::complete::char('('),
174                nom::multi::separated_list1(
175                    (
176                        multispace0,
177                        nom::character::complete::char(','),
178                        multispace0,
179                    ),
180                    expressions::parse_expression,
181                ),
182                nom::character::complete::char(')'),
183            )
184            .parse(remaining)?;
185            let (remaining, _) = multispace1(remaining)?;
186            (remaining, exprs)
187        } else {
188            (input, vec![])
189        }
190    } else {
191        (input, vec![])
192    };
193
194    //  Parse table name
195    let (input, table) = parse_identifier(input)?;
196    let (input, _) = multispace0(input)?;
197
198    // For MAKE (CREATE TABLE): parse column definitions
199    if matches!(action, Action::Make) {
200        return parse_create_table(input, table);
201    }
202
203    if matches!(action, Action::Merge) {
204        return merge::parse_merge_after_target(input, table, ctes);
205    }
206
207    let (input, joins) = many0(parse_join_clause).parse(input)?;
208    let (input, _) = multispace0(input)?;
209
210    // For SET/UPDATE: parse "values col = val, col2 = val2" before fields
211    let (input, set_cages) = if matches!(action, Action::Set) {
212        opt(parse_values_clause).parse(input)?
213    } else {
214        (input, None)
215    };
216    let (input, _) = multispace0(input)?;
217
218    let (input, columns) = opt(parse_fields_clause).parse(input)?;
219    let (input, _) = multispace0(input)?;
220
221    // For ADD/INSERT: try "from (get ...)" first, then fall back to "values val1, val2"
222    let (input, source_query) = if matches!(action, Action::Add) {
223        opt(dml::parse_source_query).parse(input)?
224    } else {
225        (input, None)
226    };
227    let (input, _) = multispace0(input)?;
228
229    // Only parse values if no source_query (INSERT...SELECT takes precedence)
230    let (input, add_cages) = if source_query.is_none() && matches!(action, Action::Add) {
231        opt(dml::parse_insert_values).parse(input)?
232    } else {
233        (input, None)
234    };
235    let (input, _) = multispace0(input)?;
236
237    let (input, where_cages) = opt(parse_where_clause).parse(input)?;
238    let (input, _) = multispace0(input)?;
239
240    let (input, having) = opt(parse_having_clause).parse(input)?;
241    let (input, _) = multispace0(input)?;
242
243    let (input, on_conflict) = if matches!(action, Action::Add) {
244        opt(dml::parse_on_conflict).parse(input)?
245    } else {
246        (input, None)
247    };
248    let (input, _) = multispace0(input)?;
249
250    let (input, order_cages) = opt(parse_order_by_clause).parse(input)?;
251    let (input, _) = multispace0(input)?;
252    let (input, limit_cage) = opt(parse_limit_clause).parse(input)?;
253    let (input, _) = multispace0(input)?;
254    let (input, offset_cage) = opt(parse_offset_clause).parse(input)?;
255
256    let mut cages = Vec::new();
257
258    // For SET, values come first (as Payload cage)
259    if let Some(sc) = set_cages {
260        cages.push(sc);
261    }
262
263    // For ADD, values come as Payload cage too
264    if let Some(ac) = add_cages {
265        cages.push(ac);
266    }
267
268    if let Some(wc) = where_cages {
269        cages.extend(wc);
270    }
271    if let Some(oc) = order_cages {
272        cages.extend(oc);
273    }
274    if let Some(lc) = limit_cage {
275        cages.push(lc);
276    }
277    if let Some(oc) = offset_cage {
278        cages.push(oc);
279    }
280
281    Ok((
282        input,
283        Qail {
284            action,
285            table: table.to_string(),
286            columns: columns.unwrap_or_else(|| vec![Expr::Star]),
287            joins,
288            cages,
289            distinct,
290            distinct_on,
291            index_def: None,
292            table_constraints: vec![],
293            set_ops: vec![],
294            having: having.unwrap_or_default(),
295            group_by_mode: GroupByMode::default(),
296            returning: None,
297            ctes,
298            on_conflict,
299            merge: None,
300            source_query,
301            channel: None,
302            payload: None,
303            savepoint_name: None,
304            from_tables: vec![],
305            using_tables: vec![],
306            lock_mode: None,
307            skip_locked: false,
308            fetch: None,
309            default_values: false,
310            overriding: None,
311            sample: None,
312            only_table: false,
313            vector: None,
314            score_threshold: None,
315            vector_name: None,
316            with_vector: false,
317            vector_size: None,
318            distance: None,
319            on_disk: None,
320            function_def: None,
321            trigger_def: None,
322            policy_def: None,
323        },
324    ))
325}
326
327/// Strip SQL comments from input (both -- line comments and /* */ block comments)
328fn strip_sql_comments(input: &str) -> String {
329    let mut result = String::with_capacity(input.len());
330    let bytes = input.as_bytes();
331    let mut i = 0;
332    let mut in_single_quote = false;
333    let mut in_double_quote = false;
334    let mut raw_delimiter: Option<String> = None;
335
336    while i < input.len() {
337        if let Some(ref delimiter) = raw_delimiter {
338            if input[i..].starts_with(delimiter) {
339                result.push_str(delimiter);
340                i += delimiter.len();
341                raw_delimiter = None;
342            } else {
343                push_char_at(input, &mut result, &mut i);
344            }
345            continue;
346        }
347
348        if in_single_quote {
349            if bytes[i] == b'\'' {
350                result.push('\'');
351                i += 1;
352                if i < input.len() && bytes[i] == b'\'' {
353                    result.push('\'');
354                    i += 1;
355                } else {
356                    in_single_quote = false;
357                }
358            } else {
359                push_char_at(input, &mut result, &mut i);
360            }
361            continue;
362        }
363
364        if in_double_quote {
365            if bytes[i] == b'"' {
366                result.push('"');
367                i += 1;
368                if i < input.len() && bytes[i] == b'"' {
369                    result.push('"');
370                    i += 1;
371                } else {
372                    in_double_quote = false;
373                }
374            } else {
375                push_char_at(input, &mut result, &mut i);
376            }
377            continue;
378        }
379
380        if input[i..].starts_with("'''") || input[i..].starts_with("\"\"\"") {
381            let delimiter = &input[i..i + 3];
382            result.push_str(delimiter);
383            raw_delimiter = Some(delimiter.to_string());
384            i += 3;
385            continue;
386        }
387
388        if bytes[i] == b'\'' {
389            in_single_quote = true;
390            result.push('\'');
391            i += 1;
392            continue;
393        }
394
395        if bytes[i] == b'"' {
396            in_double_quote = true;
397            result.push('"');
398            i += 1;
399            continue;
400        }
401
402        if let Some(delimiter_len) = dollar_quote_delimiter_len(bytes, i) {
403            let delimiter = &input[i..i + delimiter_len];
404            result.push_str(delimiter);
405            raw_delimiter = Some(delimiter.to_string());
406            i += delimiter_len;
407            continue;
408        }
409
410        if bytes[i] == b'-' && i + 1 < input.len() && bytes[i + 1] == b'-' {
411            i += 2;
412            while i < input.len() {
413                let Some(ch) = input.get(i..).and_then(|s| s.chars().next()) else {
414                    break;
415                };
416                i += ch.len_utf8();
417                if ch == '\n' {
418                    result.push('\n');
419                    break;
420                }
421            }
422        } else if bytes[i] == b'/' && i + 1 < input.len() && bytes[i + 1] == b'*' {
423            i += 2;
424            let mut closed = false;
425            while i < input.len() {
426                if bytes[i] == b'*' && i + 1 < input.len() && bytes[i + 1] == b'/' {
427                    i += 2;
428                    result.push(' '); // replace with space to preserve separation
429                    closed = true;
430                    break;
431                }
432                advance_char(input, &mut i);
433            }
434            if !closed {
435                // Unclosed block comment — preserve raw text so parser reports error
436                result.push_str("/*");
437            }
438        } else {
439            push_char_at(input, &mut result, &mut i);
440        }
441    }
442
443    result
444}
445
446fn push_char_at(input: &str, output: &mut String, index: &mut usize) {
447    if let Some(ch) = input.get(*index..).and_then(|s| s.chars().next()) {
448        output.push(ch);
449        *index += ch.len_utf8();
450    } else {
451        *index = input.len();
452    }
453}
454
455fn advance_char(input: &str, index: &mut usize) {
456    if let Some(ch) = input.get(*index..).and_then(|s| s.chars().next()) {
457        *index += ch.len_utf8();
458    } else {
459        *index = input.len();
460    }
461}
462
463fn dollar_quote_delimiter_len(bytes: &[u8], start: usize) -> Option<usize> {
464    if bytes.get(start) != Some(&b'$') {
465        return None;
466    }
467
468    let mut end = start + 1;
469    if bytes.get(end) == Some(&b'$') {
470        return Some(2);
471    }
472
473    let first = *bytes.get(end)?;
474    if !first.is_ascii_alphabetic() && first != b'_' {
475        return None;
476    }
477    end += 1;
478
479    while let Some(&byte) = bytes.get(end) {
480        if byte == b'$' {
481            return Some(end - start + 1);
482        }
483        if !byte.is_ascii_alphanumeric() && byte != b'_' {
484            return None;
485        }
486        end += 1;
487    }
488
489    None
490}