qail_core/parser/grammar/
functions.rs

1//! Function and aggregate parsing.
2//!
3//! Handles parsing of:
4//! - Function calls: name(arg1, arg2)
5//! - Aggregates: COUNT, SUM, AVG, MIN, MAX
6//! - FILTER (WHERE ...) clause for aggregates
7//! - COUNT(DISTINCT col) syntax
8
9use super::base::{parse_identifier, parse_operator, parse_value};
10use super::expressions::parse_expression;
11use crate::ast::*;
12use nom::{
13    IResult, Parser,
14    branch::alt,
15    bytes::complete::{tag, tag_no_case},
16    character::complete::{char, multispace0, multispace1},
17    combinator::{map, opt, peek},
18    multi::separated_list0,
19    sequence::preceded,
20};
21
22/// Parse function call or aggregate: name(arg1, arg2)
23pub fn parse_function_or_aggregate(input: &str) -> IResult<&str, Expr> {
24    // Identifier followed by (
25    let (input, name) = parse_identifier(input)?;
26    let (input, _) = multispace0(input)?;
27    let (input, _) = char('(').parse(input)?;
28    let (input, _) = multispace0(input)?;
29
30    // Check for DISTINCT keyword (for COUNT(DISTINCT col), etc.)
31    let (input, has_distinct) = opt((tag_no_case("distinct"), multispace1)).parse(input)?;
32    let distinct = has_distinct.is_some();
33
34    // Parse arguments as full expressions (supports nesting)
35    let (input, args) =
36        separated_list0((multispace0, char(','), multispace0), parse_function_arg).parse(input)?;
37
38    let (input, _) = multispace0(input)?;
39    let (input, _) = char(')').parse(input)?;
40    let (input, _) = multispace0(input)?;
41
42    // Check for FILTER (WHERE ...) clause - PostgreSQL aggregate extension
43    let (input, filter_clause) = opt(parse_filter_clause).parse(input)?;
44
45    // Check for OVER clause (window function)
46    let (input, _) = multispace0(input)?;
47    if let Ok((remaining, _)) = tag_no_case::<_, _, nom::error::Error<&str>>("over").parse(input) {
48        let (remaining, _) = multispace0(remaining)?;
49        let (remaining, _) = char('(').parse(remaining)?;
50        let (remaining, _) = multispace0(remaining)?;
51
52        // Parse PARTITION BY clause (optional)
53        let (remaining, partition) = opt(parse_partition_by).parse(remaining)?;
54        let partition = partition.unwrap_or_default();
55        let (remaining, _) = multispace0(remaining)?;
56
57        // Parse ORDER BY clause (optional)
58        let (remaining, order) = opt(parse_window_order_by).parse(remaining)?;
59        let order = order.unwrap_or_default();
60        let (remaining, _) = multispace0(remaining)?;
61
62        // Close the OVER clause
63        let (remaining, _) = char(')').parse(remaining)?;
64        let (remaining, _) = multispace0(remaining)?;
65
66        // Optional alias for window function
67        let (remaining, alias) = opt(preceded(
68            (multispace0, tag_no_case("as"), multispace1),
69            parse_identifier,
70        ))
71        .parse(remaining)?;
72        let alias_str = alias
73            .map(|s| s.to_string())
74            .unwrap_or_else(|| name.to_string());
75
76        // Convert args to Values for Expr::Window
77        let params: Vec<Value> = args
78            .iter()
79            .map(|e| Value::Function(e.to_string()))
80            .collect();
81
82        return Ok((
83            remaining,
84            Expr::Window {
85                name: alias_str,
86                func: name.to_string(),
87                params,
88                partition,
89                order,
90                frame: None, // TODO: Add frame parsing if needed
91            },
92        ));
93    }
94
95    // Optional alias: AS alias_name or just alias_name (after space)
96    let (input, alias) = opt(preceded(
97        (multispace0, tag_no_case("as"), multispace1),
98        parse_identifier,
99    ))
100    .parse(input)?;
101    let alias = alias.map(|s| s.to_string());
102
103    let name_lower = name.to_lowercase();
104    match name_lower.as_str() {
105        "count" | "sum" | "avg" | "min" | "max" => {
106            // For aggregates, convert first arg to string representation
107            let col = args
108                .first()
109                .map(|e| e.to_string())
110                .unwrap_or_else(|| "*".to_string());
111            let func = match name_lower.as_str() {
112                "count" => AggregateFunc::Count,
113                "sum" => AggregateFunc::Sum,
114                "avg" => AggregateFunc::Avg,
115                "min" => AggregateFunc::Min,
116                "max" => AggregateFunc::Max,
117                _ => AggregateFunc::Count, // unreachable
118            };
119            Ok((
120                input,
121                Expr::Aggregate {
122                    col,
123                    func,
124                    distinct,
125                    filter: filter_clause,
126                    alias,
127                },
128            ))
129        }
130        _ => Ok((
131            input,
132            Expr::FunctionCall {
133                name: name.to_string(),
134                args,
135                alias,
136            },
137        )),
138    }
139}
140
141/// Parse a single function argument (supports expressions or star)
142pub fn parse_function_arg(input: &str) -> IResult<&str, Expr> {
143    alt((map(tag("*"), |_| Expr::Star), parse_expression)).parse(input)
144}
145
146/// Parse FILTER (WHERE condition) clause for aggregates
147fn parse_filter_clause(input: &str) -> IResult<&str, Vec<Condition>> {
148    let (input, _) = tag_no_case("filter").parse(input)?;
149    let (input, _) = multispace0(input)?;
150    let (input, _) = char('(').parse(input)?;
151    let (input, _) = multispace0(input)?;
152    let (input, _) = tag_no_case("where").parse(input)?;
153    let (input, _) = multispace1(input)?;
154
155    // Parse conditions (simple version - single or AND-joined conditions)
156    let (input, conditions) = parse_filter_conditions(input)?;
157
158    let (input, _) = multispace0(input)?;
159    let (input, _) = char(')').parse(input)?;
160
161    Ok((input, conditions))
162}
163
164/// Parse conditions inside FILTER clause
165fn parse_filter_conditions(input: &str) -> IResult<&str, Vec<Condition>> {
166    let mut conditions = Vec::new();
167    let mut current_input = input;
168
169    loop {
170        // Parse: column op value/expression
171        let (input, _) = multispace0(current_input)?;
172        let (input, col) = parse_identifier(input)?;
173        let (input, _) = multispace0(input)?;
174        let (input, op) = parse_operator(input)?;
175        let (input, _) = multispace0(input)?;
176
177        // For IS NULL / IS NOT NULL, no value needed
178        let (input, value) = if matches!(op, Operator::IsNull | Operator::IsNotNull) {
179            (input, Value::Null)
180        } else if matches!(op, Operator::In | Operator::NotIn) {
181            // Parse IN ('val1', 'val2', ...)
182            let (input, _) = char('(').parse(input)?;
183            let (input, _) = multispace0(input)?;
184            let (input, values) =
185                separated_list0((multispace0, char(','), multispace0), parse_value).parse(input)?;
186            let (input, _) = multispace0(input)?;
187            let (input, _) = char(')').parse(input)?;
188            (input, Value::Array(values))
189        } else if matches!(op, Operator::Between | Operator::NotBetween) {
190            // Parse BETWEEN min AND max
191            let (input, min_val) = parse_value(input)?;
192            let (input, _) = multispace1(input)?;
193            let (input, _) = tag_no_case("and").parse(input)?;
194            let (input, _) = multispace1(input)?;
195            let (input, max_val) = parse_value(input)?;
196            // Store as array with 2 elements [min, max]
197            (input, Value::Array(vec![min_val, max_val]))
198        } else {
199            // Try parsing as expression first (for now() - 24h type syntax)
200            parse_filter_value(input)?
201        };
202
203        conditions.push(Condition {
204            left: Expr::Named(col.to_string()),
205            op,
206            value,
207            is_array_unnest: false,
208        });
209
210        current_input = input;
211
212        // Check for AND (use multispace0 since parse_filter_value may consume trailing space)
213        let and_result: IResult<&str, _> = preceded(
214            (multispace0, tag_no_case("and"), multispace1),
215            peek(parse_identifier),
216        )
217        .parse(current_input);
218
219        if let Ok((_next_input, _)) = and_result {
220            // Skip the AND keyword and trailing whitespace
221            let (next_input, _) = multispace0(current_input)?;
222            let (next_input, _) = tag_no_case("and").parse(next_input)?;
223            let (next_input, _) = multispace1(next_input)?;
224            current_input = next_input;
225        } else {
226            break;
227        }
228    }
229
230    Ok((current_input, conditions))
231}
232
233/// Parse a value in FILTER condition that can be either a simple value or an expression
234/// like `now() - 24h`. Converts expressions to Value::Function with SQL representation.
235fn parse_filter_value(input: &str) -> IResult<&str, Value> {
236    // First try simple value (but NOT interval - we want full expression parsing for that)
237    // Try parsing just string, int, float, bool, null, params first
238    if let Ok((remaining, val)) = parse_value(input) {
239        // If it's an interval, we still want the proper SQL format
240        if let Value::Interval { amount, unit } = val {
241            return Ok((
242                remaining,
243                Value::Function(format!("INTERVAL '{} {}'", amount, unit)),
244            ));
245        }
246
247        // If it's a function or other complex value, we need expression parsing
248        if !matches!(val, Value::Function(_)) {
249            return Ok((remaining, val));
250        }
251    }
252
253    // Try parsing as a full expression (handles now() - 24h correctly)
254    // We parse as expression and then convert to SQL string representation
255    // Need to parse until we hit a boundary (AND, ))
256    let mut end_pos = 0;
257    let mut paren_depth = 0;
258
259    for (i, c) in input.char_indices() {
260        match c {
261            '(' => paren_depth += 1,
262            ')' => {
263                if paren_depth == 0 {
264                    end_pos = i;
265                    break;
266                }
267                paren_depth -= 1;
268            }
269            _ => {}
270        }
271
272        // Check for AND keyword (case insensitive)
273        if paren_depth == 0 && i > 0 {
274            let remaining = &input[i..];
275            if remaining.len() >= 4 {
276                let potential_and = &remaining[..4].to_lowercase();
277                if potential_and.starts_with("and ")
278                    || potential_and.starts_with("and\t")
279                    || potential_and.starts_with("and\n")
280                {
281                    end_pos = i;
282                    break;
283                }
284            }
285        }
286    }
287
288    if end_pos == 0 {
289        end_pos = input.len();
290    }
291
292    let expr_str = input[..end_pos].trim();
293    if expr_str.is_empty() {
294        return Err(nom::Err::Error(nom::error::Error::new(
295            input,
296            nom::error::ErrorKind::TakeWhile1,
297        )));
298    }
299
300    // Try to parse as expression and get proper SQL
301    if let Ok((_, expr)) = parse_expression(expr_str) {
302        // Convert expression to SQL string (this handles INTERVAL properly)
303        return Ok((&input[end_pos..], Value::Function(expr.to_string())));
304    }
305
306    // Fallback: return as-is (shouldn't happen often)
307    Ok((&input[end_pos..], Value::Function(expr_str.to_string())))
308}
309
310/// Parse PARTITION BY col1, col2 clause for window functions
311fn parse_partition_by(input: &str) -> IResult<&str, Vec<String>> {
312    let (input, _) = tag_no_case("partition").parse(input)?;
313    let (input, _) = multispace1(input)?;
314    let (input, _) = tag_no_case("by").parse(input)?;
315    let (input, _) = multispace1(input)?;
316
317    let (input, cols) =
318        separated_list0((multispace0, char(','), multispace0), parse_identifier).parse(input)?;
319
320    Ok((input, cols.into_iter().map(|s| s.to_string()).collect()))
321}
322
323/// Parse ORDER BY col1 [asc|desc], col2 clause for window functions
324fn parse_window_order_by(input: &str) -> IResult<&str, Vec<Cage>> {
325    let (input, _) = tag_no_case("order").parse(input)?;
326    let (input, _) = multispace1(input)?;
327    let (input, _) = tag_no_case("by").parse(input)?;
328    let (input, _) = multispace1(input)?;
329
330    let (input, order_parts) = separated_list0(
331        (multispace0, char(','), multispace0),
332        parse_window_sort_item,
333    )
334    .parse(input)?;
335
336    Ok((input, order_parts))
337}
338
339/// Parse a single order by item: col [asc|desc]
340fn parse_window_sort_item(input: &str) -> IResult<&str, Cage> {
341    use nom::combinator::value;
342
343    let (input, col) = parse_identifier(input)?;
344    let (input, _) = multispace0(input)?;
345
346    let (input, order) = opt(alt((
347        value(SortOrder::Desc, tag_no_case("desc")),
348        value(SortOrder::Asc, tag_no_case("asc")),
349    )))
350    .parse(input)?;
351
352    Ok((
353        input,
354        Cage {
355            kind: CageKind::Sort(order.unwrap_or(SortOrder::Asc)),
356            conditions: vec![Condition {
357                left: Expr::Named(col.to_string()),
358                op: Operator::Eq,
359                value: Value::Null,
360                is_array_unnest: false,
361            }],
362            logical_op: LogicalOp::And,
363        },
364    ))
365}