qail_core/parser/grammar/
functions.rs

1//! Function and aggregate parsing.
2//!
3//! Handles parsing of:
4//! - Function calls: name(arg1, arg2)
5//! - Aggregates: COUNT, SUM, AVG, MIN, MAX
6//! - FILTER (WHERE ...) clause for aggregates
7//! - COUNT(DISTINCT col) syntax
8
9use super::base::{parse_identifier, parse_operator, parse_value};
10use super::expressions::parse_expression;
11use crate::ast::*;
12use nom::{
13    IResult, Parser,
14    branch::alt,
15    bytes::complete::{tag, tag_no_case},
16    character::complete::{char, multispace0, multispace1},
17    combinator::{map, opt, peek},
18    multi::separated_list0,
19    sequence::preceded,
20};
21
22/// Parse function call or aggregate: name(arg1, arg2)
23pub fn parse_function_or_aggregate(input: &str) -> IResult<&str, Expr> {
24    // Identifier followed by (
25    let (input, name) = parse_identifier(input)?;
26    let (input, _) = multispace0(input)?;
27    let (input, _) = char('(').parse(input)?;
28    let (input, _) = multispace0(input)?;
29
30    let (input, has_distinct) = opt((tag_no_case("distinct"), multispace1)).parse(input)?;
31    let distinct = has_distinct.is_some();
32
33    let (input, args) =
34        separated_list0((multispace0, char(','), multispace0), parse_function_arg).parse(input)?;
35
36    let (input, _) = multispace0(input)?;
37    let (input, _) = char(')').parse(input)?;
38    let (input, _) = multispace0(input)?;
39
40    let (input, filter_clause) = opt(parse_filter_clause).parse(input)?;
41
42    let (input, _) = multispace0(input)?;
43    if let Ok((remaining, _)) = tag_no_case::<_, _, nom::error::Error<&str>>("over").parse(input) {
44        let (remaining, _) = multispace0(remaining)?;
45        let (remaining, _) = char('(').parse(remaining)?;
46        let (remaining, _) = multispace0(remaining)?;
47
48        let (remaining, partition) = opt(parse_partition_by).parse(remaining)?;
49        let partition = partition.unwrap_or_default();
50        let (remaining, _) = multispace0(remaining)?;
51
52        let (remaining, order) = opt(parse_window_order_by).parse(remaining)?;
53        let order = order.unwrap_or_default();
54        let (remaining, _) = multispace0(remaining)?;
55
56        // Parse optional window frame: ROWS/RANGE BETWEEN ... AND ...
57        let (remaining, frame) = opt(parse_window_frame).parse(remaining)?;
58        let (remaining, _) = multispace0(remaining)?;
59
60        // Close the OVER clause
61        let (remaining, _) = char(')').parse(remaining)?;
62        let (remaining, _) = multispace0(remaining)?;
63
64        // Optional alias for window function
65        let (remaining, alias) = opt(preceded(
66            (multispace0, tag_no_case("as"), multispace1),
67            parse_identifier,
68        ))
69        .parse(remaining)?;
70        let alias_str = alias
71            .map(|s| s.to_string())
72            .unwrap_or_else(|| name.to_string());
73
74        // args are already Vec<Expr> from parse_function_arg - use directly (native AST)
75        return Ok((
76            remaining,
77            Expr::Window {
78                name: alias_str,
79                func: name.to_string(),
80                params: args,  // Pass Expr args directly for native AST
81                partition,
82                order,
83                frame,
84            },
85        ));
86    }
87
88    // Optional alias: AS alias_name or just alias_name (after space)
89    let (input, alias) = opt(preceded(
90        (multispace0, tag_no_case("as"), multispace1),
91        parse_identifier,
92    ))
93    .parse(input)?;
94    let alias = alias.map(|s| s.to_string());
95
96    let name_lower = name.to_lowercase();
97    match name_lower.as_str() {
98        "count" | "sum" | "avg" | "min" | "max" => {
99            // For aggregates, convert first arg to string representation
100            let col = args
101                .first()
102                .map(|e| e.to_string())
103                .unwrap_or_else(|| "*".to_string());
104            let func = match name_lower.as_str() {
105                "count" => AggregateFunc::Count,
106                "sum" => AggregateFunc::Sum,
107                "avg" => AggregateFunc::Avg,
108                "min" => AggregateFunc::Min,
109                "max" => AggregateFunc::Max,
110                _ => AggregateFunc::Count, // unreachable
111            };
112            Ok((
113                input,
114                Expr::Aggregate {
115                    col,
116                    func,
117                    distinct,
118                    filter: filter_clause,
119                    alias,
120                },
121            ))
122        }
123        _ => Ok((
124            input,
125            Expr::FunctionCall {
126                name: name.to_string(),
127                args,
128                alias,
129            },
130        )),
131    }
132}
133
134/// Parse a single function argument (supports expressions or star)
135pub fn parse_function_arg(input: &str) -> IResult<&str, Expr> {
136    alt((map(tag("*"), |_| Expr::Star), parse_expression)).parse(input)
137}
138
139/// Parse FILTER (WHERE condition) clause for aggregates
140fn parse_filter_clause(input: &str) -> IResult<&str, Vec<Condition>> {
141    let (input, _) = tag_no_case("filter").parse(input)?;
142    let (input, _) = multispace0(input)?;
143    let (input, _) = char('(').parse(input)?;
144    let (input, _) = multispace0(input)?;
145    let (input, _) = tag_no_case("where").parse(input)?;
146    let (input, _) = multispace1(input)?;
147
148    let (input, conditions) = parse_filter_conditions(input)?;
149
150    let (input, _) = multispace0(input)?;
151    let (input, _) = char(')').parse(input)?;
152
153    Ok((input, conditions))
154}
155
156/// Parse conditions inside FILTER clause
157fn parse_filter_conditions(input: &str) -> IResult<&str, Vec<Condition>> {
158    let mut conditions = Vec::new();
159    let mut current_input = input;
160
161    loop {
162        let (input, _) = multispace0(current_input)?;
163        let (input, col) = parse_identifier(input)?;
164        let (input, _) = multispace0(input)?;
165        let (input, op) = parse_operator(input)?;
166        let (input, _) = multispace0(input)?;
167
168        // For IS NULL / IS NOT NULL, no value needed
169        let (input, value) = if matches!(op, Operator::IsNull | Operator::IsNotNull) {
170            (input, Value::Null)
171        } else if matches!(op, Operator::In | Operator::NotIn) {
172            let (input, _) = char('(').parse(input)?;
173            let (input, _) = multispace0(input)?;
174            let (input, values) =
175                separated_list0((multispace0, char(','), multispace0), parse_value).parse(input)?;
176            let (input, _) = multispace0(input)?;
177            let (input, _) = char(')').parse(input)?;
178            (input, Value::Array(values))
179        } else if matches!(op, Operator::Between | Operator::NotBetween) {
180            let (input, min_val) = parse_value(input)?;
181            let (input, _) = multispace1(input)?;
182            let (input, _) = tag_no_case("and").parse(input)?;
183            let (input, _) = multispace1(input)?;
184            let (input, max_val) = parse_value(input)?;
185            // Store as array with 2 elements [min, max]
186            (input, Value::Array(vec![min_val, max_val]))
187        } else {
188            // Try parsing as expression first (for now() - 24h type syntax)
189            parse_filter_value(input)?
190        };
191
192        conditions.push(Condition {
193            left: Expr::Named(col.to_string()),
194            op,
195            value,
196            is_array_unnest: false,
197        });
198
199        current_input = input;
200
201        let and_result: IResult<&str, _> = preceded(
202            (multispace0, tag_no_case("and"), multispace1),
203            peek(parse_identifier),
204        )
205        .parse(current_input);
206
207        if let Ok((_next_input, _)) = and_result {
208            // Skip the AND keyword and trailing whitespace
209            let (next_input, _) = multispace0(current_input)?;
210            let (next_input, _) = tag_no_case("and").parse(next_input)?;
211            let (next_input, _) = multispace1(next_input)?;
212            current_input = next_input;
213        } else {
214            break;
215        }
216    }
217
218    Ok((current_input, conditions))
219}
220
221/// Parse a value in FILTER condition that can be either a simple value or an expression
222/// like `now() - 24h`. Converts expressions to Value::Function with SQL representation.
223fn parse_filter_value(input: &str) -> IResult<&str, Value> {
224    // First try simple value (but NOT interval - we want full expression parsing for that)
225    // Try parsing just string, int, float, bool, null, params first
226    if let Ok((remaining, val)) = parse_value(input) {
227        // If it's an interval, we still want the proper SQL format
228        if let Value::Interval { amount, unit } = val {
229            return Ok((
230                remaining,
231                Value::Function(format!("INTERVAL '{} {}'", amount, unit)),
232            ));
233        }
234
235        // If it's a function or other complex value, we need expression parsing
236        if !matches!(val, Value::Function(_)) {
237            return Ok((remaining, val));
238        }
239    }
240
241    // Try parsing as a full expression (handles now() - 24h correctly)
242    // We parse as expression and then convert to SQL string representation
243    // Need to parse until we hit a boundary (AND, ))
244    let mut end_pos = 0;
245    let mut paren_depth = 0;
246
247    for (i, c) in input.char_indices() {
248        match c {
249            '(' => paren_depth += 1,
250            ')' => {
251                if paren_depth == 0 {
252                    end_pos = i;
253                    break;
254                }
255                paren_depth -= 1;
256            }
257            _ => {}
258        }
259
260        if paren_depth == 0 && i > 0 {
261            let remaining = &input[i..];
262            if remaining.len() >= 4 {
263                let potential_and = &remaining[..4].to_lowercase();
264                if potential_and.starts_with("and ")
265                    || potential_and.starts_with("and\t")
266                    || potential_and.starts_with("and\n")
267                {
268                    end_pos = i;
269                    break;
270                }
271            }
272        }
273    }
274
275    if end_pos == 0 {
276        end_pos = input.len();
277    }
278
279    let expr_str = input[..end_pos].trim();
280    if expr_str.is_empty() {
281        return Err(nom::Err::Error(nom::error::Error::new(
282            input,
283            nom::error::ErrorKind::TakeWhile1,
284        )));
285    }
286
287    // Try to parse as expression and get proper SQL
288    if let Ok((_, expr)) = parse_expression(expr_str) {
289        return Ok((&input[end_pos..], Value::Function(expr.to_string())));
290    }
291
292    Ok((&input[end_pos..], Value::Function(expr_str.to_string())))
293}
294
295/// Parse PARTITION BY col1, col2 clause for window functions
296fn parse_partition_by(input: &str) -> IResult<&str, Vec<String>> {
297    let (input, _) = tag_no_case("partition").parse(input)?;
298    let (input, _) = multispace1(input)?;
299    let (input, _) = tag_no_case("by").parse(input)?;
300    let (input, _) = multispace1(input)?;
301
302    let (input, cols) =
303        separated_list0((multispace0, char(','), multispace0), parse_identifier).parse(input)?;
304
305    Ok((input, cols.into_iter().map(|s| s.to_string()).collect()))
306}
307
308/// Parse ORDER BY col1 [asc|desc], col2 clause for window functions
309fn parse_window_order_by(input: &str) -> IResult<&str, Vec<Cage>> {
310    let (input, _) = tag_no_case("order").parse(input)?;
311    let (input, _) = multispace1(input)?;
312    let (input, _) = tag_no_case("by").parse(input)?;
313    let (input, _) = multispace1(input)?;
314
315    let (input, order_parts) = separated_list0(
316        (multispace0, char(','), multispace0),
317        parse_window_sort_item,
318    )
319    .parse(input)?;
320
321    Ok((input, order_parts))
322}
323
324/// Parse a single order by item: col [asc|desc]
325fn parse_window_sort_item(input: &str) -> IResult<&str, Cage> {
326    use nom::combinator::value;
327
328    let (input, col) = parse_identifier(input)?;
329    let (input, _) = multispace0(input)?;
330
331    let (input, order) = opt(alt((
332        value(SortOrder::Desc, tag_no_case("desc")),
333        value(SortOrder::Asc, tag_no_case("asc")),
334    )))
335    .parse(input)?;
336
337    Ok((
338        input,
339        Cage {
340            kind: CageKind::Sort(order.unwrap_or(SortOrder::Asc)),
341            conditions: vec![Condition {
342                left: Expr::Named(col.to_string()),
343                op: Operator::Eq,
344                value: Value::Null,
345                is_array_unnest: false,
346            }],
347            logical_op: LogicalOp::And,
348        },
349    ))
350}
351
352/// Parse window frame: ROWS/RANGE BETWEEN start AND end
353fn parse_window_frame(input: &str) -> IResult<&str, WindowFrame> {
354    use nom::combinator::value;
355    
356    // Parse ROWS or RANGE
357    let (input, is_rows) = alt((
358        value(true, tag_no_case("rows")),
359        value(false, tag_no_case("range")),
360    )).parse(input)?;
361    let (input, _) = multispace1(input)?;
362    let (input, _) = tag_no_case("between").parse(input)?;
363    let (input, _) = multispace1(input)?;
364    
365    // Parse start bound
366    let (input, start) = parse_frame_bound(input)?;
367    let (input, _) = multispace1(input)?;
368    let (input, _) = tag_no_case("and").parse(input)?;
369    let (input, _) = multispace1(input)?;
370    
371    // Parse end bound
372    let (input, end) = parse_frame_bound(input)?;
373    
374    let frame = if is_rows {
375        WindowFrame::Rows { start, end }
376    } else {
377        WindowFrame::Range { start, end }
378    };
379    
380    Ok((input, frame))
381}
382
383/// Parse frame bound: UNBOUNDED PRECEDING, N PRECEDING, CURRENT ROW, N FOLLOWING, UNBOUNDED FOLLOWING
384fn parse_frame_bound(input: &str) -> IResult<&str, FrameBound> {
385    use nom::combinator::value;
386    use nom::character::complete::i32 as parse_i32;
387    
388    alt((
389        // UNBOUNDED PRECEDING
390        value(FrameBound::UnboundedPreceding, 
391            (tag_no_case("unbounded"), multispace1, tag_no_case("preceding"))),
392        // UNBOUNDED FOLLOWING
393        value(FrameBound::UnboundedFollowing,
394            (tag_no_case("unbounded"), multispace1, tag_no_case("following"))),
395        // CURRENT ROW
396        value(FrameBound::CurrentRow,
397            (tag_no_case("current"), multispace1, tag_no_case("row"))),
398        // N PRECEDING
399        map((parse_i32, multispace1, tag_no_case("preceding")),
400            |(n, _, _)| FrameBound::Preceding(n)),
401        // N FOLLOWING
402        map((parse_i32, multispace1, tag_no_case("following")),
403            |(n, _, _)| FrameBound::Following(n)),
404    )).parse(input)
405}