Skip to main content

qail_core/parser/grammar/
functions.rs

1//! Function and aggregate parsing.
2//!
3//! Handles parsing of:
4//! - Function calls: name(arg1, arg2)
5//! - Aggregates: COUNT, SUM, AVG, MIN, MAX
6//! - FILTER (WHERE ...) clause for aggregates
7//! - COUNT(DISTINCT col) syntax
8
9use super::base::{parse_identifier, parse_operator, parse_value};
10use super::expressions::parse_expression;
11use crate::ast::*;
12use nom::{
13    IResult, Parser,
14    branch::alt,
15    bytes::complete::{tag, tag_no_case},
16    character::complete::{char, multispace0, multispace1},
17    combinator::{map, opt, peek},
18    multi::{separated_list0, separated_list1},
19    sequence::preceded,
20};
21
22/// Parse function call or aggregate: name(arg1, arg2)
23pub fn parse_function_or_aggregate(input: &str) -> IResult<&str, Expr> {
24    // Identifier followed by (
25    let (input, name) = parse_identifier(input)?;
26    let (input, _) = multispace0(input)?;
27    let (input, _) = char('(').parse(input)?;
28    let (input, _) = multispace0(input)?;
29
30    let (input, has_distinct) = opt((tag_no_case("distinct"), multispace1)).parse(input)?;
31    let distinct = has_distinct.is_some();
32
33    let (input, args) =
34        separated_list0((multispace0, char(','), multispace0), parse_function_arg).parse(input)?;
35
36    let (input, _) = multispace0(input)?;
37    let (input, _) = char(')').parse(input)?;
38    let (input, _) = multispace0(input)?;
39
40    let (input, filter_clause) = opt(parse_filter_clause).parse(input)?;
41
42    let (input, _) = multispace0(input)?;
43    if let Ok((remaining, _)) = tag_no_case::<_, _, nom::error::Error<&str>>("over").parse(input) {
44        let (remaining, _) = multispace0(remaining)?;
45        let (remaining, _) = char('(').parse(remaining)?;
46        let (remaining, _) = multispace0(remaining)?;
47
48        let (remaining, partition) = opt(parse_partition_by).parse(remaining)?;
49        let partition = partition.unwrap_or_default();
50        let (remaining, _) = multispace0(remaining)?;
51
52        let (remaining, order) = opt(parse_window_order_by).parse(remaining)?;
53        let order = order.unwrap_or_default();
54        let (remaining, _) = multispace0(remaining)?;
55
56        // Parse optional window frame: ROWS/RANGE BETWEEN ... AND ...
57        let (remaining, frame) = opt(parse_window_frame).parse(remaining)?;
58        let (remaining, _) = multispace0(remaining)?;
59
60        // Close the OVER clause
61        let (remaining, _) = char(')').parse(remaining)?;
62        let (remaining, _) = multispace0(remaining)?;
63
64        // Optional alias for window function
65        let (remaining, alias) = opt(preceded(
66            (multispace0, tag_no_case("as"), multispace1),
67            parse_identifier,
68        ))
69        .parse(remaining)?;
70        let alias_str = alias
71            .map(|s| s.to_string())
72            .unwrap_or_else(|| name.to_string());
73
74        // args are already Vec<Expr> from parse_function_arg - use directly (native AST)
75        return Ok((
76            remaining,
77            Expr::Window {
78                name: alias_str,
79                func: name.to_string(),
80                params: args, // Pass Expr args directly for native AST
81                partition,
82                order,
83                frame,
84            },
85        ));
86    }
87
88    // Optional alias: AS alias_name or just alias_name (after space)
89    let (input, alias) = opt(preceded(
90        (multispace0, tag_no_case("as"), multispace1),
91        parse_identifier,
92    ))
93    .parse(input)?;
94    let alias = alias.map(|s| s.to_string());
95
96    let name_lower = name.to_lowercase();
97    match name_lower.as_str() {
98        "count" | "sum" | "avg" | "min" | "max" => {
99            // For aggregates, convert first arg to string representation
100            let col = args
101                .first()
102                .map(|e| e.to_string())
103                .unwrap_or_else(|| "*".to_string());
104            let func = match name_lower.as_str() {
105                "count" => AggregateFunc::Count,
106                "sum" => AggregateFunc::Sum,
107                "avg" => AggregateFunc::Avg,
108                "min" => AggregateFunc::Min,
109                "max" => AggregateFunc::Max,
110                _ => AggregateFunc::Count, // unreachable
111            };
112            Ok((
113                input,
114                Expr::Aggregate {
115                    col,
116                    func,
117                    distinct,
118                    filter: filter_clause,
119                    alias,
120                },
121            ))
122        }
123        _ => Ok((
124            input,
125            Expr::FunctionCall {
126                name: name.to_string(),
127                args,
128                alias,
129            },
130        )),
131    }
132}
133
134/// Parse a single function argument (supports expressions or star)
135pub fn parse_function_arg(input: &str) -> IResult<&str, Expr> {
136    alt((map(tag("*"), |_| Expr::Star), parse_expression)).parse(input)
137}
138
139/// Parse FILTER (WHERE condition) clause for aggregates
140fn parse_filter_clause(input: &str) -> IResult<&str, Vec<Condition>> {
141    let (input, _) = tag_no_case("filter").parse(input)?;
142    let (input, _) = multispace0(input)?;
143    let (input, _) = char('(').parse(input)?;
144    let (input, _) = multispace0(input)?;
145    let (input, _) = tag_no_case("where").parse(input)?;
146    let (input, _) = multispace1(input)?;
147
148    let (input, conditions) = parse_filter_conditions(input)?;
149
150    let (input, _) = multispace0(input)?;
151    let (input, _) = char(')').parse(input)?;
152
153    Ok((input, conditions))
154}
155
156/// Parse conditions inside FILTER clause
157fn parse_filter_conditions(input: &str) -> IResult<&str, Vec<Condition>> {
158    let mut conditions = Vec::new();
159    let mut current_input = input;
160
161    loop {
162        let (input, _) = multispace0(current_input)?;
163        let (input, col) = parse_identifier(input)?;
164        let (input, _) = multispace0(input)?;
165        let (input, op) = parse_operator(input)?;
166        let (input, _) = multispace0(input)?;
167
168        // For IS NULL / IS NOT NULL, no value needed
169        let (input, value) = if matches!(op, Operator::IsNull | Operator::IsNotNull) {
170            (input, Value::Null)
171        } else if matches!(op, Operator::In | Operator::NotIn) {
172            let (input, _) = multispace0(input)?;
173            if let Ok((input, _)) = char::<_, nom::error::Error<&str>>('(').parse(input) {
174                let (input, _) = multispace0(input)?;
175                let (input, values) =
176                    separated_list0((multispace0, char(','), multispace0), parse_value)
177                        .parse(input)?;
178                let (input, _) = multispace0(input)?;
179                let (input, _) = char(')').parse(input)?;
180                if values.is_empty() {
181                    return Err(nom::Err::Error(nom::error::Error::new(
182                        input,
183                        nom::error::ErrorKind::SeparatedList,
184                    )));
185                }
186                (input, Value::Array(values))
187            } else {
188                parse_value(input)?
189            }
190        } else if matches!(op, Operator::Between | Operator::NotBetween) {
191            let (input, min_val) = parse_value(input)?;
192            let (input, _) = multispace1(input)?;
193            let (input, _) = tag_no_case("and").parse(input)?;
194            let (input, _) = multispace1(input)?;
195            let (input, max_val) = parse_value(input)?;
196            // Store as array with 2 elements [min, max]
197            (input, Value::Array(vec![min_val, max_val]))
198        } else {
199            // Try parsing as expression first (for now() - 24h type syntax)
200            parse_filter_value(input)?
201        };
202
203        conditions.push(Condition {
204            left: Expr::Named(col.to_string()),
205            op,
206            value,
207            is_array_unnest: false,
208        });
209
210        current_input = input;
211
212        let and_result: IResult<&str, _> = preceded(
213            (multispace0, tag_no_case("and"), multispace1),
214            peek(parse_identifier),
215        )
216        .parse(current_input);
217
218        if let Ok((_next_input, _)) = and_result {
219            // Skip the AND keyword and trailing whitespace
220            let (next_input, _) = multispace0(current_input)?;
221            let (next_input, _) = tag_no_case("and").parse(next_input)?;
222            let (next_input, _) = multispace1(next_input)?;
223            current_input = next_input;
224        } else {
225            break;
226        }
227    }
228
229    Ok((current_input, conditions))
230}
231
232/// Parse a value in FILTER condition that can be either a simple value or an expression
233/// like `now() - 24h`. Converts expressions to Value::Function with SQL representation.
234fn parse_filter_value(input: &str) -> IResult<&str, Value> {
235    // First try simple value (but NOT interval - we want full expression parsing for that)
236    // Try parsing just string, int, float, bool, null, params first
237    if let Ok((remaining, val)) = parse_value(input) {
238        // If it's an interval, we still want the proper SQL format
239        if let Value::Interval { amount, unit } = val {
240            return Ok((
241                remaining,
242                Value::Function(format!("INTERVAL '{} {}'", amount, unit)),
243            ));
244        }
245
246        // If it's a function or other complex value, we need expression parsing
247        if !matches!(val, Value::Function(_)) {
248            return Ok((remaining, val));
249        }
250    }
251
252    // Try parsing as a full expression (handles now() - 24h correctly)
253    // We parse as expression and then convert to SQL string representation
254    // Need to parse until we hit a boundary (AND, ))
255    let mut end_pos = 0;
256    let mut paren_depth = 0;
257
258    for (i, c) in input.char_indices() {
259        match c {
260            '(' => paren_depth += 1,
261            ')' => {
262                if paren_depth == 0 {
263                    end_pos = i;
264                    break;
265                }
266                paren_depth -= 1;
267            }
268            _ => {}
269        }
270
271        if paren_depth == 0 && i > 0 {
272            let remaining = &input[i..];
273            if remaining.len() >= 4 {
274                let potential_and = &remaining[..4].to_lowercase();
275                if potential_and.starts_with("and ")
276                    || potential_and.starts_with("and\t")
277                    || potential_and.starts_with("and\n")
278                {
279                    end_pos = i;
280                    break;
281                }
282            }
283        }
284    }
285
286    if end_pos == 0 {
287        end_pos = input.len();
288    }
289
290    let expr_str = input[..end_pos].trim();
291    if expr_str.is_empty() {
292        return Err(nom::Err::Error(nom::error::Error::new(
293            input,
294            nom::error::ErrorKind::TakeWhile1,
295        )));
296    }
297
298    // Try to parse as expression and get proper SQL
299    if let Ok((_, expr)) = parse_expression(expr_str) {
300        return Ok((&input[end_pos..], Value::Function(expr.to_string())));
301    }
302
303    Ok((&input[end_pos..], Value::Function(expr_str.to_string())))
304}
305
306/// Parse PARTITION BY col1, col2 clause for window functions
307fn parse_partition_by(input: &str) -> IResult<&str, Vec<String>> {
308    let (input, _) = tag_no_case("partition").parse(input)?;
309    let (input, _) = multispace1(input)?;
310    let (input, _) = tag_no_case("by").parse(input)?;
311    let (input, _) = multispace1(input)?;
312
313    let (input, cols) =
314        separated_list1((multispace0, char(','), multispace0), parse_identifier).parse(input)?;
315
316    Ok((input, cols.into_iter().map(|s| s.to_string()).collect()))
317}
318
319/// Parse ORDER BY col1 [asc|desc], col2 clause for window functions
320fn parse_window_order_by(input: &str) -> IResult<&str, Vec<Cage>> {
321    let (input, _) = tag_no_case("order").parse(input)?;
322    let (input, _) = multispace1(input)?;
323    let (input, _) = tag_no_case("by").parse(input)?;
324    let (input, _) = multispace1(input)?;
325
326    let (input, order_parts) = separated_list1(
327        (multispace0, char(','), multispace0),
328        parse_window_sort_item,
329    )
330    .parse(input)?;
331
332    Ok((input, order_parts))
333}
334
335/// Parse a single order by item: col [asc|desc]
336fn parse_window_sort_item(input: &str) -> IResult<&str, Cage> {
337    use nom::combinator::value;
338
339    let (input, col) = parse_identifier(input)?;
340    let (input, _) = multispace0(input)?;
341
342    let (input, order) = opt(alt((
343        value(SortOrder::Desc, tag_no_case("desc")),
344        value(SortOrder::Asc, tag_no_case("asc")),
345    )))
346    .parse(input)?;
347
348    Ok((
349        input,
350        Cage {
351            kind: CageKind::Sort(order.unwrap_or(SortOrder::Asc)),
352            conditions: vec![Condition {
353                left: Expr::Named(col.to_string()),
354                op: Operator::Eq,
355                value: Value::Null,
356                is_array_unnest: false,
357            }],
358            logical_op: LogicalOp::And,
359        },
360    ))
361}
362
363/// Parse window frame: ROWS/RANGE BETWEEN start AND end
364fn parse_window_frame(input: &str) -> IResult<&str, WindowFrame> {
365    use nom::combinator::value;
366
367    // Parse ROWS or RANGE
368    let (input, is_rows) = alt((
369        value(true, tag_no_case("rows")),
370        value(false, tag_no_case("range")),
371    ))
372    .parse(input)?;
373    let (input, _) = multispace1(input)?;
374    let (input, _) = tag_no_case("between").parse(input)?;
375    let (input, _) = multispace1(input)?;
376
377    // Parse start bound
378    let (input, start) = parse_frame_bound(input)?;
379    let (input, _) = multispace1(input)?;
380    let (input, _) = tag_no_case("and").parse(input)?;
381    let (input, _) = multispace1(input)?;
382
383    // Parse end bound
384    let (input, end) = parse_frame_bound(input)?;
385
386    let frame = if is_rows {
387        WindowFrame::Rows { start, end }
388    } else {
389        WindowFrame::Range { start, end }
390    };
391
392    Ok((input, frame))
393}
394
395/// Parse frame bound: UNBOUNDED PRECEDING, N PRECEDING, CURRENT ROW, N FOLLOWING, UNBOUNDED FOLLOWING
396fn parse_frame_bound(input: &str) -> IResult<&str, FrameBound> {
397    use nom::character::complete::i32 as parse_i32;
398    use nom::combinator::value;
399
400    alt((
401        // UNBOUNDED PRECEDING
402        value(
403            FrameBound::UnboundedPreceding,
404            (
405                tag_no_case("unbounded"),
406                multispace1,
407                tag_no_case("preceding"),
408            ),
409        ),
410        // UNBOUNDED FOLLOWING
411        value(
412            FrameBound::UnboundedFollowing,
413            (
414                tag_no_case("unbounded"),
415                multispace1,
416                tag_no_case("following"),
417            ),
418        ),
419        // CURRENT ROW
420        value(
421            FrameBound::CurrentRow,
422            (tag_no_case("current"), multispace1, tag_no_case("row")),
423        ),
424        // N PRECEDING
425        map(
426            (parse_i32, multispace1, tag_no_case("preceding")),
427            |(n, _, _)| FrameBound::Preceding(n),
428        ),
429        // N FOLLOWING
430        map(
431            (parse_i32, multispace1, tag_no_case("following")),
432            |(n, _, _)| FrameBound::Following(n),
433        ),
434    ))
435    .parse(input)
436}