qail_core/parser/grammar/
mod.rs1pub mod base;
3pub mod binary_ops;
5pub mod case_when;
7pub mod clauses;
9pub mod cte;
11pub mod ddl;
13pub mod dml;
15pub mod expressions;
17pub mod functions;
19pub mod joins;
21pub mod merge;
23pub mod special_funcs;
25
26use self::base::*;
27use self::clauses::*;
28use self::ddl::*;
29use self::dml::*;
30use self::joins::*;
31use crate::ast::*;
32use nom::{
33 IResult, Parser,
34 bytes::complete::tag_no_case,
35 character::complete::{multispace0, multispace1},
36 combinator::opt,
37 multi::many0,
38};
39pub fn parse(input: &str) -> Result<Qail, String> {
45 let cleaned = strip_sql_comments(input);
46 let desugared = desugar_bracket_filter(&cleaned);
49 match parse_root(&desugared) {
50 Ok(("", cmd)) => Ok(cmd),
51 Ok((remaining, _)) => Err(format!("Unexpected trailing content: '{}'", remaining)),
52 Err(e) => Err(format!("Parse error: {:?}", e)),
53 }
54}
55
56fn desugar_bracket_filter(input: &str) -> String {
59 let trimmed = input.trim();
60 if let Some(bracket_start) = trimmed.find('[') {
63 let before_bracket = &trimmed[..bracket_start];
65 if !before_bracket.contains(' ') {
67 return trimmed.to_string();
68 }
69
70 let before_lower = before_bracket.to_ascii_lowercase();
73 if before_lower.contains(" where ")
74 || before_lower.contains(" fields ")
75 || before_lower.contains(" having ")
76 || before_lower.contains(" order ")
77 || before_lower.contains(" limit ")
78 || before_lower.contains(" offset ")
79 || before_lower.contains(" join ")
80 {
81 return trimmed.to_string();
82 }
83
84 let after_bracket = &trimmed[bracket_start + 1..];
86 let mut depth = 1;
87 let mut in_single_quote = false;
88 let mut in_double_quote = false;
89 let mut bracket_end = None;
90
91 for (i, c) in after_bracket.char_indices() {
92 match c {
93 '\'' if !in_double_quote => in_single_quote = !in_single_quote,
94 '"' if !in_single_quote => in_double_quote = !in_double_quote,
95 '[' if !in_single_quote && !in_double_quote => depth += 1,
96 ']' if !in_single_quote && !in_double_quote => {
97 depth -= 1;
98 if depth == 0 {
99 bracket_end = Some(i);
100 break;
101 }
102 }
103 _ => {}
104 }
105 }
106
107 if let Some(end_pos) = bracket_end {
108 let filter = &after_bracket[..end_pos];
109 let rest = &after_bracket[end_pos + 1..].trim();
110
111 let rest_lower = rest.to_lowercase();
113 if rest_lower.contains("where ") || rest_lower.contains("where\n") {
114 return format!("{} {} AND {}", before_bracket, rest, filter);
116 } else if rest.is_empty() {
117 return format!("{} where {}", before_bracket, filter);
118 } else {
119 return format!("{} {} where {}", before_bracket, rest, filter);
120 }
121 }
122 }
123 trimmed.to_string()
124}
125
126pub fn parse_root(input: &str) -> IResult<&str, Qail> {
129 let input = input.trim();
130
131 if let Ok((remaining, cmd)) = parse_txn_command(input) {
133 return Ok((remaining, cmd));
134 }
135
136 if let Ok((remaining, cmd)) = parse_procedural_command(input) {
138 return Ok((remaining, cmd));
139 }
140
141 if let Ok((remaining, cmd)) = parse_create_index(input) {
143 return Ok((remaining, cmd));
144 }
145
146 let lower_input = input.to_lowercase();
148 let (input, ctes) = if lower_input.starts_with("with")
149 && lower_input
150 .chars()
151 .nth(4)
152 .map(|c| c.is_whitespace())
153 .unwrap_or(false)
154 {
155 let (remaining, (cte_defs, _is_recursive)) = cte::parse_with_clause(input)?;
156 let (remaining, _) = multispace0(remaining)?;
157 (remaining, cte_defs)
158 } else {
159 (input, vec![])
160 };
161
162 let (input, (action, distinct)) = parse_action(input)?;
163 let (input, _) = multispace1(input)?;
165
166 let (input, distinct_on) = if distinct {
168 if let Ok((remaining, _)) = tag_no_case::<_, _, nom::error::Error<&str>>("on").parse(input)
170 {
171 let (remaining, _) = multispace0(remaining)?;
172 let (remaining, exprs) = nom::sequence::delimited(
173 nom::character::complete::char('('),
174 nom::multi::separated_list1(
175 (
176 multispace0,
177 nom::character::complete::char(','),
178 multispace0,
179 ),
180 expressions::parse_expression,
181 ),
182 nom::character::complete::char(')'),
183 )
184 .parse(remaining)?;
185 let (remaining, _) = multispace1(remaining)?;
186 (remaining, exprs)
187 } else {
188 (input, vec![])
189 }
190 } else {
191 (input, vec![])
192 };
193
194 let (input, table) = parse_identifier(input)?;
196 let (input, _) = multispace0(input)?;
197
198 if matches!(action, Action::Make) {
200 return parse_create_table(input, table);
201 }
202
203 if matches!(action, Action::Merge) {
204 return merge::parse_merge_after_target(input, table, ctes);
205 }
206
207 let (input, joins) = many0(parse_join_clause).parse(input)?;
208 let (input, _) = multispace0(input)?;
209
210 let (input, set_cages) = if matches!(action, Action::Set) {
212 opt(parse_values_clause).parse(input)?
213 } else {
214 (input, None)
215 };
216 let (input, _) = multispace0(input)?;
217
218 let (input, columns) = opt(parse_fields_clause).parse(input)?;
219 let (input, _) = multispace0(input)?;
220
221 let (input, source_query) = if matches!(action, Action::Add) {
223 opt(dml::parse_source_query).parse(input)?
224 } else {
225 (input, None)
226 };
227 let (input, _) = multispace0(input)?;
228
229 let (input, add_cages) = if source_query.is_none() && matches!(action, Action::Add) {
231 opt(dml::parse_insert_values).parse(input)?
232 } else {
233 (input, None)
234 };
235 let (input, _) = multispace0(input)?;
236
237 let (input, where_cages) = opt(parse_where_clause).parse(input)?;
238 let (input, _) = multispace0(input)?;
239
240 let (input, having) = opt(parse_having_clause).parse(input)?;
241 let (input, _) = multispace0(input)?;
242
243 let (input, on_conflict) = if matches!(action, Action::Add) {
244 opt(dml::parse_on_conflict).parse(input)?
245 } else {
246 (input, None)
247 };
248 let (input, _) = multispace0(input)?;
249
250 let (input, order_cages) = opt(parse_order_by_clause).parse(input)?;
251 let (input, _) = multispace0(input)?;
252 let (input, limit_cage) = opt(parse_limit_clause).parse(input)?;
253 let (input, _) = multispace0(input)?;
254 let (input, offset_cage) = opt(parse_offset_clause).parse(input)?;
255
256 let mut cages = Vec::new();
257
258 if let Some(sc) = set_cages {
260 cages.push(sc);
261 }
262
263 if let Some(ac) = add_cages {
265 cages.push(ac);
266 }
267
268 if let Some(wc) = where_cages {
269 cages.extend(wc);
270 }
271 if let Some(oc) = order_cages {
272 cages.extend(oc);
273 }
274 if let Some(lc) = limit_cage {
275 cages.push(lc);
276 }
277 if let Some(oc) = offset_cage {
278 cages.push(oc);
279 }
280
281 Ok((
282 input,
283 Qail {
284 action,
285 table: table.to_string(),
286 columns: columns.unwrap_or_else(|| vec![Expr::Star]),
287 joins,
288 cages,
289 distinct,
290 distinct_on,
291 index_def: None,
292 table_constraints: vec![],
293 set_ops: vec![],
294 having: having.unwrap_or_default(),
295 group_by_mode: GroupByMode::default(),
296 returning: None,
297 ctes,
298 on_conflict,
299 merge: None,
300 source_query,
301 channel: None,
302 payload: None,
303 savepoint_name: None,
304 from_tables: vec![],
305 using_tables: vec![],
306 lock_mode: None,
307 skip_locked: false,
308 fetch: None,
309 default_values: false,
310 overriding: None,
311 sample: None,
312 only_table: false,
313 vector: None,
314 score_threshold: None,
315 vector_name: None,
316 with_vector: false,
317 vector_size: None,
318 distance: None,
319 on_disk: None,
320 function_def: None,
321 trigger_def: None,
322 policy_def: None,
323 },
324 ))
325}
326
327fn strip_sql_comments(input: &str) -> String {
329 let mut result = String::with_capacity(input.len());
330 let bytes = input.as_bytes();
331 let mut i = 0;
332 let mut in_single_quote = false;
333 let mut in_double_quote = false;
334 let mut raw_delimiter: Option<String> = None;
335
336 while i < input.len() {
337 if let Some(ref delimiter) = raw_delimiter {
338 if input[i..].starts_with(delimiter) {
339 result.push_str(delimiter);
340 i += delimiter.len();
341 raw_delimiter = None;
342 } else {
343 push_char_at(input, &mut result, &mut i);
344 }
345 continue;
346 }
347
348 if in_single_quote {
349 if bytes[i] == b'\'' {
350 result.push('\'');
351 i += 1;
352 if i < input.len() && bytes[i] == b'\'' {
353 result.push('\'');
354 i += 1;
355 } else {
356 in_single_quote = false;
357 }
358 } else {
359 push_char_at(input, &mut result, &mut i);
360 }
361 continue;
362 }
363
364 if in_double_quote {
365 if bytes[i] == b'"' {
366 result.push('"');
367 i += 1;
368 if i < input.len() && bytes[i] == b'"' {
369 result.push('"');
370 i += 1;
371 } else {
372 in_double_quote = false;
373 }
374 } else {
375 push_char_at(input, &mut result, &mut i);
376 }
377 continue;
378 }
379
380 if input[i..].starts_with("'''") || input[i..].starts_with("\"\"\"") {
381 let delimiter = &input[i..i + 3];
382 result.push_str(delimiter);
383 raw_delimiter = Some(delimiter.to_string());
384 i += 3;
385 continue;
386 }
387
388 if bytes[i] == b'\'' {
389 in_single_quote = true;
390 result.push('\'');
391 i += 1;
392 continue;
393 }
394
395 if bytes[i] == b'"' {
396 in_double_quote = true;
397 result.push('"');
398 i += 1;
399 continue;
400 }
401
402 if let Some(delimiter_len) = dollar_quote_delimiter_len(bytes, i) {
403 let delimiter = &input[i..i + delimiter_len];
404 result.push_str(delimiter);
405 raw_delimiter = Some(delimiter.to_string());
406 i += delimiter_len;
407 continue;
408 }
409
410 if bytes[i] == b'-' && i + 1 < input.len() && bytes[i + 1] == b'-' {
411 i += 2;
412 while i < input.len() {
413 let Some(ch) = input.get(i..).and_then(|s| s.chars().next()) else {
414 break;
415 };
416 i += ch.len_utf8();
417 if ch == '\n' {
418 result.push('\n');
419 break;
420 }
421 }
422 } else if bytes[i] == b'/' && i + 1 < input.len() && bytes[i + 1] == b'*' {
423 i += 2;
424 let mut closed = false;
425 while i < input.len() {
426 if bytes[i] == b'*' && i + 1 < input.len() && bytes[i + 1] == b'/' {
427 i += 2;
428 result.push(' '); closed = true;
430 break;
431 }
432 advance_char(input, &mut i);
433 }
434 if !closed {
435 result.push_str("/*");
437 }
438 } else {
439 push_char_at(input, &mut result, &mut i);
440 }
441 }
442
443 result
444}
445
446fn push_char_at(input: &str, output: &mut String, index: &mut usize) {
447 if let Some(ch) = input.get(*index..).and_then(|s| s.chars().next()) {
448 output.push(ch);
449 *index += ch.len_utf8();
450 } else {
451 *index = input.len();
452 }
453}
454
455fn advance_char(input: &str, index: &mut usize) {
456 if let Some(ch) = input.get(*index..).and_then(|s| s.chars().next()) {
457 *index += ch.len_utf8();
458 } else {
459 *index = input.len();
460 }
461}
462
463fn dollar_quote_delimiter_len(bytes: &[u8], start: usize) -> Option<usize> {
464 if bytes.get(start) != Some(&b'$') {
465 return None;
466 }
467
468 let mut end = start + 1;
469 if bytes.get(end) == Some(&b'$') {
470 return Some(2);
471 }
472
473 let first = *bytes.get(end)?;
474 if !first.is_ascii_alphabetic() && first != b'_' {
475 return None;
476 }
477 end += 1;
478
479 while let Some(&byte) = bytes.get(end) {
480 if byte == b'$' {
481 return Some(end - start + 1);
482 }
483 if !byte.is_ascii_alphanumeric() && byte != b'_' {
484 return None;
485 }
486 end += 1;
487 }
488
489 None
490}