qail_core/parser/grammar/
mod.rs

1pub mod base;
2pub mod clauses;
3pub mod cte;
4pub mod ddl;
5pub mod dml;
6pub mod joins;
7pub mod binary_ops;
8pub mod functions;
9pub mod case_when;
10pub mod special_funcs;
11pub mod expressions;
12
13use nom::{
14    bytes::complete::tag_no_case,
15    combinator::{opt},
16    multi::many0,
17    character::complete::{multispace0, multispace1},
18    Parser,
19    IResult,
20};
21use crate::ast::*;
22// Import parsers from submodules
23use self::base::*;
24use self::clauses::*;
25use self::ddl::*;
26use self::dml::*;
27use self::joins::*;
28// use self::expressions::*; // Used in clauses module
29
30/// Parse a QAIL query with comment preprocessing.
31/// This is the recommended entry point - handles SQL comment stripping.
32pub fn parse(input: &str) -> Result<QailCmd, String> {
33    let cleaned = strip_sql_comments(input);
34    match parse_root(&cleaned) {
35        Ok((_, cmd)) => Ok(cmd),
36        Err(e) => Err(format!("Parse error: {:?}", e)),
37    }
38}
39
40/// Parse a QAIL query (root entry point).
41/// Note: Does NOT strip comments. Use `parse()` for automatic comment handling.
42pub fn parse_root(input: &str) -> IResult<&str, QailCmd> {
43    let input = input.trim();
44    
45    // Try transaction commands first (single keywords)
46    if let Ok((remaining, cmd)) = parse_txn_command(input) {
47        return Ok((remaining, cmd));
48    }
49    
50    // Try CREATE INDEX first (special case: "index name on table ...")
51    if let Ok((remaining, cmd)) = parse_create_index(input) {
52        return Ok((remaining, cmd));
53    }
54    
55    // Try WITH clause (CTE) parsing
56    // Check for 'with' followed by any whitespace (space, newline, tab)
57    let lower_input = input.to_lowercase();
58    let (input, ctes) = if lower_input.starts_with("with") && 
59        lower_input.chars().nth(4).map(|c| c.is_whitespace()).unwrap_or(false) {
60        let (remaining, (cte_defs, _is_recursive)) = cte::parse_with_clause(input)?;
61        let (remaining, _) = multispace0(remaining)?;
62        (remaining, cte_defs)
63    } else {
64        (input, vec![])
65    };
66    
67    // Parse action first
68    let (input, (action, distinct)) = parse_action(input)?;
69    let (input, _) = multispace1(input)?;
70    
71    // Check for DISTINCT ON (expr1, expr2) after action (Postgres-specific)
72    // Supports expressions like: CASE WHEN ... END, functions, columns
73    let (input, distinct_on) = if distinct {
74        // If already parsed "get distinct", check for "on (...)"
75        if let Ok((remaining, _)) = tag_no_case::<_, _, nom::error::Error<&str>>("on").parse(input) {
76            let (remaining, _) = multispace0(remaining)?;
77            // Parse (expr1, expr2) - full expressions, not just identifiers
78            let (remaining, exprs) = nom::sequence::delimited(
79                nom::character::complete::char('('),
80                nom::multi::separated_list1(
81                    (multispace0, nom::character::complete::char(','), multispace0),
82                    expressions::parse_expression
83                ),
84                nom::character::complete::char(')')
85            ).parse(remaining)?;
86            let (remaining, _) = multispace1(remaining)?;
87            (remaining, exprs)
88        } else {
89            (input, vec![])
90        }
91    } else {
92        (input, vec![])
93    };
94    
95    //  Parse table name
96    let (input, table) = parse_identifier(input)?;
97    let (input, _) = multispace0(input)?;
98    
99    // For MAKE (CREATE TABLE): parse column definitions
100    if matches!(action, Action::Make) {
101        return parse_create_table(input, table);
102    }
103    
104    // Parse optional joins: [inner|left|right] join table [on condition]
105    let (input, joins) = many0(parse_join_clause).parse(input)?;
106    let (input, _) = multispace0(input)?;
107    
108    // For SET/UPDATE: parse "values col = val, col2 = val2" before fields
109    let (input, set_cages) = if matches!(action, Action::Set) {
110        opt(parse_values_clause).parse(input)?
111    } else {
112        (input, None)
113    };
114    let (input, _) = multispace0(input)?;
115    
116    // Parse optional clauses
117    let (input, columns) = opt(parse_fields_clause).parse(input)?;
118    let (input, _) = multispace0(input)?;
119    
120    // For ADD/INSERT: try "from (get ...)" first, then fall back to "values val1, val2"
121    let (input, source_query) = if matches!(action, Action::Add) {
122        opt(dml::parse_source_query).parse(input)?
123    } else {
124        (input, None)
125    };
126    let (input, _) = multispace0(input)?;
127    
128    // Only parse values if no source_query (INSERT...SELECT takes precedence)
129    let (input, add_cages) = if source_query.is_none() && matches!(action, Action::Add) {
130        opt(dml::parse_insert_values).parse(input)?
131    } else {
132        (input, None)
133    };
134    let (input, _) = multispace0(input)?;
135    
136    let (input, where_cages) = opt(parse_where_clause).parse(input)?;
137    let (input, _) = multispace0(input)?;
138    
139    // Parse HAVING clause (for filtering on aggregates - comes after implicit GROUP BY)
140    let (input, having) = opt(parse_having_clause).parse(input)?;
141    let (input, _) = multispace0(input)?;
142    
143    // Parse ON CONFLICT clause (for ADD/INSERT only)
144    let (input, on_conflict) = if matches!(action, Action::Add) {
145        opt(dml::parse_on_conflict).parse(input)?
146    } else {
147        (input, None)
148    };
149    let (input, _) = multispace0(input)?;
150    
151    let (input, order_cages) = opt(parse_order_by_clause).parse(input)?;
152    let (input, _) = multispace0(input)?;
153    let (input, limit_cage) = opt(parse_limit_clause).parse(input)?;
154    let (input, _) = multispace0(input)?;
155    let (input, offset_cage) = opt(parse_offset_clause).parse(input)?;
156    
157    // Build cages
158    let mut cages = Vec::new();
159    
160    // For SET, values come first (as Payload cage)
161    if let Some(sc) = set_cages {
162        cages.push(sc);
163    }
164    
165    // For ADD, values come as Payload cage too
166    if let Some(ac) = add_cages {
167        cages.push(ac);
168    }
169    
170    if let Some(wc) = where_cages {
171        cages.extend(wc);
172    }
173    if let Some(oc) = order_cages {
174        cages.extend(oc);
175    }
176    if let Some(lc) = limit_cage {
177        cages.push(lc);
178    }
179    if let Some(oc) = offset_cage {
180        cages.push(oc);
181    }
182    
183    Ok((input, QailCmd {
184        action,
185        table: table.to_string(),
186        columns: columns.unwrap_or_else(|| vec![Expr::Star]),
187        joins,
188        cages,
189        distinct,
190        distinct_on,
191        index_def: None,
192        table_constraints: vec![],
193        set_ops: vec![],
194        having: having.unwrap_or_default(),
195        group_by_mode: GroupByMode::default(),
196        returning: None,
197        ctes,
198        on_conflict,
199        source_query,
200    }))
201}
202
203/// Strip SQL comments from input (both -- line comments and /* */ block comments)
204fn strip_sql_comments(input: &str) -> String {
205    let mut result = String::with_capacity(input.len());
206    let mut chars = input.chars().peekable();
207    
208    while let Some(c) = chars.next() {
209        if c == '-' && chars.peek() == Some(&'-') {
210            // Line comment: skip until end of line
211            chars.next(); // consume second -
212            while let Some(&nc) = chars.peek() {
213                if nc == '\n' {
214                    result.push('\n'); // preserve newline
215                    chars.next();
216                    break;
217                }
218                chars.next();
219            }
220        } else if c == '/' && chars.peek() == Some(&'*') {
221            // Block comment: skip until */
222            chars.next(); // consume *
223            while let Some(nc) = chars.next() {
224                if nc == '*' && chars.peek() == Some(&'/') {
225                    chars.next(); // consume /
226                    result.push(' '); // replace with space to preserve separation
227                    break;
228                }
229            }
230        } else {
231            result.push(c);
232        }
233    }
234    
235    result
236}