Skip to main content

fraiseql_core/runtime/
aggregate_parser.rs

1//! Aggregate Query Parser
2//!
3//! Parses GraphQL aggregate queries into `AggregationRequest` for execution.
4//!
5//! # GraphQL Query Format
6//!
7//! ```graphql
8//! query {
9//!   sales_aggregate(
10//!     where: { customer_id: { _eq: "uuid-123" } }
11//!     groupBy: { category: true, occurred_at_day: true }
12//!     having: { revenue_sum_gt: 1000 }
13//!     orderBy: { revenue_sum: DESC }
14//!     limit: 10
15//!   ) {
16//!     category
17//!     occurred_at_day
18//!     count
19//!     revenue_sum
20//!     revenue_avg
21//!   }
22//! }
23//! ```
24//!
25//! # Parsed Result
26//!
27//! ```text
28//! // Illustrative output structure only — not executable code.
29//! AggregationRequest {
30//!     table_name: "tf_sales",
31//!     where_clause: Some(...),
32//!     group_by: vec![
33//!         GroupBySelection::Dimension { path: "category", alias: "category" },
34//!         GroupBySelection::TemporalBucket { column: "occurred_at", bucket: Day, alias: "occurred_at_day" },
35//!     ],
36//!     aggregates: vec![
37//!         AggregateSelection::Count { alias: "count" },
38//!         AggregateSelection::MeasureAggregate { measure: "revenue", function: Sum, alias: "revenue_sum" },
39//!         AggregateSelection::MeasureAggregate { measure: "revenue", function: Avg, alias: "revenue_avg" },
40//!     ],
41//!     having: vec![...],
42//!     order_by: vec![...],
43//!     limit: Some(10),
44//!     offset: None,
45//! }
46//! ```
47
48use serde_json::Value;
49
50use crate::{
51    compiler::{
52        aggregate_types::{AggregateFunction, HavingOperator, TemporalBucket},
53        aggregation::{
54            AggregateSelection, AggregationRequest, GroupBySelection, HavingCondition,
55            OrderByClause, OrderDirection,
56        },
57        fact_table::FactTableMetadata,
58    },
59    db::where_clause::{WhereClause, WhereOperator},
60    error::{FraiseQLError, Result},
61};
62
63/// Aggregate query parser
64pub struct AggregateQueryParser;
65
66impl AggregateQueryParser {
67    /// Parse a simplified aggregate query into `AggregationRequest`.
68    ///
69    /// For we'll accept a JSON structure that represents the query:
70    /// ```json
71    /// {
72    ///   "table": "tf_sales",
73    ///   "groupBy": {
74    ///     "category": true,
75    ///     "occurred_at_day": true
76    ///   },
77    ///   "aggregates": [
78    ///     {"count": {}},
79    ///     {"revenue_sum": {}}
80    ///   ],
81    ///   "having": {
82    ///     "revenue_sum_gt": 1000
83    ///   },
84    ///   "orderBy": {
85    ///     "revenue_sum": "DESC"
86    ///   },
87    ///   "limit": 10
88    /// }
89    /// ```
90    ///
91    /// # Errors
92    ///
93    /// Returns error if the query structure is invalid or references non-existent measures.
94    ///
95    /// The `native_columns` map (column name → PostgreSQL cast string) controls whether
96    /// WHERE filters and GROUP BY fields are emitted as direct column references
97    /// (`WhereClause::NativeField`, `GroupBySelection::NativeDimension`) rather than
98    /// JSONB extraction variants.  Pass an empty map to get the old behaviour.
99    pub fn parse(
100        query_json: &Value,
101        metadata: &FactTableMetadata,
102        native_columns: &std::collections::HashMap<String, String>,
103    ) -> Result<AggregationRequest> {
104        // Extract table name
105        let table_name = query_json
106            .get("table")
107            .and_then(|v| v.as_str())
108            .ok_or_else(|| FraiseQLError::Validation {
109                message: "Missing 'table' field in aggregate query".to_string(),
110                path:    None,
111            })?
112            .to_string();
113
114        // Parse WHERE clause (if present)
115        let where_clause = if let Some(where_obj) = query_json.get("where") {
116            Some(Self::parse_where_clause(where_obj, native_columns)?)
117        } else {
118            None
119        };
120
121        // Parse GROUP BY selections
122        let group_by = if let Some(group_by_obj) = query_json.get("groupBy") {
123            Self::parse_group_by(group_by_obj, metadata, native_columns)?
124        } else {
125            vec![]
126        };
127
128        // Parse aggregate selections from requested fields
129        let aggregates = if let Some(agg_array) = query_json.get("aggregates") {
130            Self::parse_aggregates(agg_array, metadata)?
131        } else {
132            vec![]
133        };
134
135        // Parse HAVING conditions
136        let having = if let Some(having_obj) = query_json.get("having") {
137            Self::parse_having(having_obj, &aggregates, metadata)?
138        } else {
139            vec![]
140        };
141
142        // Parse ORDER BY clauses
143        let order_by = if let Some(order_obj) = query_json.get("orderBy") {
144            Self::parse_order_by(order_obj)?
145        } else {
146            vec![]
147        };
148
149        // Parse LIMIT
150        let limit = query_json
151            .get("limit")
152            .and_then(|v| v.as_u64())
153            .map(|n| u32::try_from(n).unwrap_or(u32::MAX));
154
155        // Parse OFFSET
156        let offset = query_json
157            .get("offset")
158            .and_then(|v| v.as_u64())
159            .map(|n| u32::try_from(n).unwrap_or(u32::MAX));
160
161        Ok(AggregationRequest {
162            table_name,
163            where_clause,
164            group_by,
165            aggregates,
166            having,
167            order_by,
168            limit,
169            offset,
170        })
171    }
172
173    /// Parse WHERE clause from JSON.
174    ///
175    /// For aggregate queries, WHERE works on denormalized filter columns only.
176    /// Expected format: `{ "field_operator": value }`
177    /// Example: `{ "customer_id_eq": "123", "occurred_at_gte": "2024-01-01" }`
178    ///
179    /// When a field name matches an entry in `native_columns`, the clause is emitted
180    /// as [`WhereClause::NativeField`] (direct column reference) rather than
181    /// [`WhereClause::Field`] (JSONB extraction).
182    fn parse_where_clause(
183        where_obj: &Value,
184        native_columns: &std::collections::HashMap<String, String>,
185    ) -> Result<WhereClause> {
186        let Some(obj) = where_obj.as_object() else {
187            return Ok(WhereClause::And(vec![]));
188        };
189
190        let mut conditions = Vec::new();
191
192        for (key, value) in obj {
193            // Parse field_operator format (e.g., "customer_id_eq" -> field="customer_id",
194            // operator="eq")
195            if let Some((field, operator_str)) = Self::parse_where_field_and_operator(key)? {
196                let operator = WhereOperator::from_str(operator_str)?;
197
198                let clause = if let Some(pg_cast) = native_columns.get(field) {
199                    WhereClause::NativeField {
200                        column: field.to_string(),
201                        pg_cast: pg_cast.clone(),
202                        operator,
203                        value: value.clone(),
204                    }
205                } else {
206                    WhereClause::Field {
207                        path: vec![field.to_string()],
208                        operator,
209                        value: value.clone(),
210                    }
211                };
212                conditions.push(clause);
213            }
214        }
215
216        Ok(WhereClause::And(conditions))
217    }
218
219    /// Parse WHERE field and operator from key (e.g., "`customer_id_eq`" -> ("`customer_id`",
220    /// "eq"))
221    fn parse_where_field_and_operator(key: &str) -> Result<Option<(&str, &str)>> {
222        // Find last underscore to split field from operator
223        if let Some(last_underscore) = key.rfind('_') {
224            let field = &key[..last_underscore];
225            let operator = &key[last_underscore + 1..];
226
227            // Validate operator is known
228            match WhereOperator::from_str(operator) {
229                Ok(_) => Ok(Some((field, operator))),
230                Err(_) => {
231                    // Not a valid operator suffix, treat entire key as field (might be used
232                    // elsewhere)
233                    Ok(None)
234                },
235            }
236        } else {
237            // No underscore, not a WHERE condition
238            Ok(None)
239        }
240    }
241
242    /// Parse GROUP BY selections.
243    ///
244    /// Supports two formats:
245    /// 1. Boolean true: {"category": true} -> regular dimension
246    /// 2. Boolean true with suffix: {"`occurred_at_day"`: true} -> temporal bucket
247    /// 3. String bucket name: {"`occurred_at"`: "day"} -> temporal bucket
248    ///
249    /// When a field name matches an entry in `native_columns`, the selection is emitted
250    /// as [`GroupBySelection::NativeDimension`] rather than [`GroupBySelection::Dimension`].
251    fn parse_group_by(
252        group_by_obj: &Value,
253        metadata: &FactTableMetadata,
254        native_columns: &std::collections::HashMap<String, String>,
255    ) -> Result<Vec<GroupBySelection>> {
256        let mut selections = Vec::new();
257
258        if let Some(obj) = group_by_obj.as_object() {
259            for (key, value) in obj {
260                if value.as_bool() == Some(true) {
261                    // Format 1 & 2: Boolean true (with or without suffix)
262                    // Priority 1: Try calendar dimension first (highest performance)
263                    if let Some(calendar_sel) = Self::try_parse_calendar_bucket(key, metadata)? {
264                        selections.push(calendar_sel);
265                    } else if let Some(bucket_sel) = Self::parse_temporal_bucket(key, metadata)? {
266                        // Priority 2: Fall back to DATE_TRUNC if no calendar dimension
267                        selections.push(bucket_sel);
268                    } else if let Some(pg_cast) = native_columns.get(key.as_str()) {
269                        // Priority 3: Native SQL column — direct reference, not JSONB
270                        selections.push(GroupBySelection::NativeDimension {
271                            column:  key.clone(),
272                            pg_cast: pg_cast.clone(),
273                        });
274                    } else {
275                        // Priority 4: Regular JSONB dimension
276                        selections.push(GroupBySelection::Dimension {
277                            path:  key.clone(),
278                            alias: key.clone(),
279                        });
280                    }
281                } else if let Some(bucket_str) = value.as_str() {
282                    // Format 3: String bucket name {"occurred_at": "day"}
283                    let bucket = TemporalBucket::from_str(bucket_str)?;
284
285                    // Priority 1: Try calendar dimension first
286                    if let Some(calendar_sel) =
287                        Self::try_find_calendar_bucket(key, bucket, metadata)
288                    {
289                        selections.push(calendar_sel);
290                    } else {
291                        // Priority 2: Fall back to DATE_TRUNC
292                        // Verify this column exists in denormalized_filters
293                        let column_exists =
294                            metadata.denormalized_filters.iter().any(|f| f.name == *key);
295
296                        if !column_exists {
297                            return Err(FraiseQLError::Validation {
298                                message: format!(
299                                    "Temporal bucketing column '{}' not found in denormalized filters",
300                                    key
301                                ),
302                                path:    None,
303                            });
304                        }
305
306                        selections.push(GroupBySelection::TemporalBucket {
307                            column: key.clone(),
308                            bucket,
309                            alias: key.clone(),
310                        });
311                    }
312                }
313            }
314        }
315
316        Ok(selections)
317    }
318
319    /// Parse temporal bucket if the key matches pattern
320    fn parse_temporal_bucket(
321        key: &str,
322        metadata: &FactTableMetadata,
323    ) -> Result<Option<GroupBySelection>> {
324        // Check for temporal bucket patterns: column_day, column_week, etc.
325        for filter_col in &metadata.denormalized_filters {
326            for bucket in &[
327                ("_second", TemporalBucket::Second),
328                ("_minute", TemporalBucket::Minute),
329                ("_hour", TemporalBucket::Hour),
330                ("_day", TemporalBucket::Day),
331                ("_week", TemporalBucket::Week),
332                ("_month", TemporalBucket::Month),
333                ("_quarter", TemporalBucket::Quarter),
334                ("_year", TemporalBucket::Year),
335            ] {
336                let expected_key = format!("{}{}", filter_col.name, bucket.0);
337                if key == expected_key {
338                    return Ok(Some(GroupBySelection::TemporalBucket {
339                        column: filter_col.name.clone(),
340                        bucket: bucket.1,
341                        alias:  key.to_string(),
342                    }));
343                }
344            }
345        }
346
347        Ok(None)
348    }
349
350    /// Try to parse calendar dimension from key pattern (e.g., "`occurred_at_day`")
351    ///
352    /// Checks if the key matches a calendar dimension pattern and returns
353    /// a `CalendarDimension` selection if available, otherwise None.
354    fn try_parse_calendar_bucket(
355        key: &str,
356        metadata: &FactTableMetadata,
357    ) -> Result<Option<GroupBySelection>> {
358        for calendar_dim in &metadata.calendar_dimensions {
359            // Check all temporal bucket suffixes
360            for (suffix, bucket_type) in &[
361                ("_second", TemporalBucket::Second),
362                ("_minute", TemporalBucket::Minute),
363                ("_hour", TemporalBucket::Hour),
364                ("_day", TemporalBucket::Day),
365                ("_week", TemporalBucket::Week),
366                ("_month", TemporalBucket::Month),
367                ("_quarter", TemporalBucket::Quarter),
368                ("_year", TemporalBucket::Year),
369            ] {
370                let expected_key = format!("{}{}", calendar_dim.source_column, suffix);
371                if key == expected_key {
372                    // Find matching calendar bucket
373                    if let Some((gran, bucket)) =
374                        Self::find_calendar_bucket(calendar_dim, *bucket_type)
375                    {
376                        return Ok(Some(GroupBySelection::CalendarDimension {
377                            source_column:   calendar_dim.source_column.clone(),
378                            calendar_column: gran.column_name.clone(),
379                            json_key:        bucket.json_key.clone(),
380                            bucket:          bucket.bucket_type,
381                            alias:           key.to_string(),
382                        }));
383                    }
384                }
385            }
386        }
387        Ok(None)
388    }
389
390    /// Try to find calendar bucket for explicit temporal request
391    ///
392    /// Used when user provides explicit bucket like {"`occurred_at"`: "day"}
393    fn try_find_calendar_bucket(
394        column: &str,
395        bucket: TemporalBucket,
396        metadata: &FactTableMetadata,
397    ) -> Option<GroupBySelection> {
398        for calendar_dim in &metadata.calendar_dimensions {
399            if calendar_dim.source_column == column {
400                if let Some((gran, cal_bucket)) = Self::find_calendar_bucket(calendar_dim, bucket) {
401                    return Some(GroupBySelection::CalendarDimension {
402                        source_column:   calendar_dim.source_column.clone(),
403                        calendar_column: gran.column_name.clone(),
404                        json_key:        cal_bucket.json_key.clone(),
405                        bucket:          cal_bucket.bucket_type,
406                        alias:           column.to_string(),
407                    });
408                }
409            }
410        }
411        None
412    }
413
414    /// Find calendar bucket in available granularities
415    ///
416    /// Searches through calendar dimension granularities to find a matching bucket type.
417    /// Returns the granularity and bucket if found.
418    fn find_calendar_bucket(
419        calendar_dim: &crate::compiler::fact_table::CalendarDimension,
420        bucket: TemporalBucket,
421    ) -> Option<(
422        &crate::compiler::fact_table::CalendarGranularity,
423        &crate::compiler::fact_table::CalendarBucket,
424    )> {
425        for granularity in &calendar_dim.granularities {
426            for cal_bucket in &granularity.buckets {
427                if cal_bucket.bucket_type == bucket {
428                    return Some((granularity, cal_bucket));
429                }
430            }
431        }
432        None
433    }
434
435    /// Parse aggregate selections
436    fn parse_aggregates(
437        agg_array: &Value,
438        metadata: &FactTableMetadata,
439    ) -> Result<Vec<AggregateSelection>> {
440        let mut aggregates = Vec::new();
441
442        if let Some(arr) = agg_array.as_array() {
443            for item in arr {
444                if let Some(obj) = item.as_object() {
445                    // Each object should have one key (the aggregate name)
446                    for (agg_name, _value) in obj {
447                        aggregates.push(Self::parse_aggregate_selection(agg_name, metadata)?);
448                    }
449                }
450            }
451        }
452
453        Ok(aggregates)
454    }
455
456    /// Parse a single aggregate selection
457    fn parse_aggregate_selection(
458        agg_name: &str,
459        metadata: &FactTableMetadata,
460    ) -> Result<AggregateSelection> {
461        // Handle COUNT
462        if agg_name == "count" {
463            return Ok(AggregateSelection::Count {
464                alias: "count".to_string(),
465            });
466        }
467
468        // Handle COUNT_DISTINCT: supports both "count_distinct" (defaults to first dimension)
469        // and "field_count_distinct" pattern (e.g., "product_id_count_distinct")
470        if agg_name == "count_distinct" {
471            // Default to first dimension path, or "id" if none available
472            let default_field = Self::extract_dimension_paths(metadata)
473                .first()
474                .cloned()
475                .unwrap_or_else(|| "id".to_string());
476            return Ok(AggregateSelection::CountDistinct {
477                field: default_field,
478                alias: "count_distinct".to_string(),
479            });
480        }
481
482        // Handle field_count_distinct pattern (e.g., "customer_id_count_distinct")
483        if let Some(stripped) = agg_name.strip_suffix("_count_distinct") {
484            // Check if the stripped part matches a dimension path
485            let dimension_paths = Self::extract_dimension_paths(metadata);
486            if dimension_paths.iter().any(|p| p == stripped) {
487                return Ok(AggregateSelection::CountDistinct {
488                    field: stripped.to_string(),
489                    alias: agg_name.to_string(),
490                });
491            }
492            // Also allow count distinct on measures
493            if metadata.measures.iter().any(|m| m.name == stripped) {
494                return Ok(AggregateSelection::CountDistinct {
495                    field: stripped.to_string(),
496                    alias: agg_name.to_string(),
497                });
498            }
499            // If no match found, return error with helpful message
500            return Err(FraiseQLError::Validation {
501                message: format!(
502                    "COUNT DISTINCT field '{}' not found in dimensions or measures. Available: {:?}",
503                    stripped, dimension_paths
504                ),
505                path:    None,
506            });
507        }
508
509        // Handle boolean aggregates (BOOL_AND, BOOL_OR)
510        // e.g., "is_active_bool_and", "has_discount_bool_or"
511        for dimension_path in Self::extract_dimension_paths(metadata) {
512            if let Some(stripped) = agg_name.strip_suffix("_bool_and") {
513                if stripped == dimension_path {
514                    return Ok(AggregateSelection::BoolAggregate {
515                        field:    dimension_path,
516                        function: crate::compiler::aggregate_types::BoolAggregateFunction::And,
517                        alias:    agg_name.to_string(),
518                    });
519                }
520            }
521            if let Some(stripped) = agg_name.strip_suffix("_bool_or") {
522                if stripped == dimension_path {
523                    return Ok(AggregateSelection::BoolAggregate {
524                        field:    dimension_path,
525                        function: crate::compiler::aggregate_types::BoolAggregateFunction::Or,
526                        alias:    agg_name.to_string(),
527                    });
528                }
529            }
530        }
531
532        // Handle measure aggregates: revenue_sum, revenue_avg, etc.
533        for measure in &metadata.measures {
534            for func in &[
535                ("_sum", AggregateFunction::Sum),
536                ("_avg", AggregateFunction::Avg),
537                ("_min", AggregateFunction::Min),
538                ("_max", AggregateFunction::Max),
539                ("_stddev", AggregateFunction::Stddev),
540                ("_variance", AggregateFunction::Variance),
541                // Advanced aggregates
542                ("_array_agg", AggregateFunction::ArrayAgg),
543                ("_json_agg", AggregateFunction::JsonAgg),
544                ("_jsonb_agg", AggregateFunction::JsonbAgg),
545                ("_string_agg", AggregateFunction::StringAgg),
546            ] {
547                let expected_name = format!("{}{}", measure.name, func.0);
548                if agg_name == expected_name {
549                    return Ok(AggregateSelection::MeasureAggregate {
550                        measure:  measure.name.clone(),
551                        function: func.1,
552                        alias:    agg_name.to_string(),
553                    });
554                }
555            }
556        }
557
558        // Check for dimension-level advanced aggregates
559        // e.g., "product_id_array_agg", "product_name_string_agg"
560        for dimension_path in Self::extract_dimension_paths(metadata) {
561            for func in &[
562                ("_array_agg", AggregateFunction::ArrayAgg),
563                ("_json_agg", AggregateFunction::JsonAgg),
564                ("_jsonb_agg", AggregateFunction::JsonbAgg),
565                ("_string_agg", AggregateFunction::StringAgg),
566            ] {
567                let expected_name = format!("{}{}", dimension_path, func.0);
568                if agg_name == expected_name {
569                    // For dimension aggregates, store the path as the "measure"
570                    return Ok(AggregateSelection::MeasureAggregate {
571                        measure:  dimension_path,
572                        function: func.1,
573                        alias:    agg_name.to_string(),
574                    });
575                }
576            }
577        }
578
579        Err(FraiseQLError::Validation {
580            message: format!("Unknown aggregate selection: {agg_name}"),
581            path:    None,
582        })
583    }
584
585    /// Extract dimension paths from metadata for advanced aggregate parsing
586    fn extract_dimension_paths(metadata: &FactTableMetadata) -> Vec<String> {
587        let mut paths = Vec::new();
588
589        // Add dimension paths from JSONB column
590        for dim_path in &metadata.dimensions.paths {
591            paths.push(dim_path.name.clone());
592        }
593
594        // Add denormalized filter columns (these can also be aggregated)
595        for filter in &metadata.denormalized_filters {
596            paths.push(filter.name.clone());
597        }
598
599        paths
600    }
601
602    /// Parse HAVING conditions
603    fn parse_having(
604        having_obj: &Value,
605        aggregates: &[AggregateSelection],
606        _metadata: &FactTableMetadata,
607    ) -> Result<Vec<HavingCondition>> {
608        let mut conditions = Vec::new();
609
610        if let Some(obj) = having_obj.as_object() {
611            for (key, value) in obj {
612                // Parse condition: revenue_sum_gt: 1000
613                if let Some((agg_name, operator)) = Self::parse_having_key(key) {
614                    // Find the aggregate
615                    let aggregate = aggregates
616                        .iter()
617                        .find(|a| a.alias() == agg_name)
618                        .ok_or_else(|| FraiseQLError::Validation {
619                            message: format!(
620                                "HAVING condition references non-selected aggregate: {agg_name}"
621                            ),
622                            path:    None,
623                        })?
624                        .clone();
625
626                    conditions.push(HavingCondition {
627                        aggregate,
628                        operator,
629                        value: value.clone(),
630                    });
631                }
632            }
633        }
634
635        Ok(conditions)
636    }
637
638    /// Parse HAVING key to extract aggregate name and operator
639    fn parse_having_key(key: &str) -> Option<(&str, HavingOperator)> {
640        for (suffix, op) in &[
641            ("_gt", HavingOperator::Gt),
642            ("_gte", HavingOperator::Gte),
643            ("_lt", HavingOperator::Lt),
644            ("_lte", HavingOperator::Lte),
645            ("_eq", HavingOperator::Eq),
646            ("_neq", HavingOperator::Neq),
647        ] {
648            if let Some(agg_name) = key.strip_suffix(suffix) {
649                return Some((agg_name, *op));
650            }
651        }
652        None
653    }
654
655    /// Parse ORDER BY clauses
656    fn parse_order_by(order_obj: &Value) -> Result<Vec<OrderByClause>> {
657        let mut clauses = Vec::new();
658
659        if let Some(obj) = order_obj.as_object() {
660            for (field, value) in obj {
661                #[allow(clippy::match_same_arms)]
662                // Reason: explicit ASC arm documents accepted values; wildcard handles malformed
663                // input
664                let direction = match value.as_str() {
665                    Some("ASC" | "asc") => OrderDirection::Asc,
666                    Some("DESC" | "desc") => OrderDirection::Desc,
667                    _ => OrderDirection::Asc, // Default to ASC
668                };
669
670                clauses.push(OrderByClause::new(field.clone(), direction));
671            }
672        }
673
674        Ok(clauses)
675    }
676}
677
678#[cfg(test)]
679mod tests {
680    #![allow(clippy::unwrap_used)] // Reason: test code, panics are acceptable
681
682    use serde_json::json;
683
684    use super::*;
685    use crate::compiler::fact_table::{DimensionColumn, FilterColumn, MeasureColumn, SqlType};
686
687    fn create_test_metadata() -> FactTableMetadata {
688        use crate::compiler::fact_table::DimensionPath;
689
690        FactTableMetadata {
691            table_name:           "tf_sales".to_string(),
692            measures:             vec![
693                MeasureColumn {
694                    name:     "revenue".to_string(),
695                    sql_type: SqlType::Decimal,
696                    nullable: false,
697                },
698                MeasureColumn {
699                    name:     "quantity".to_string(),
700                    sql_type: SqlType::Int,
701                    nullable: false,
702                },
703            ],
704            dimensions:           DimensionColumn {
705                name:  "dimensions".to_string(),
706                paths: vec![
707                    DimensionPath {
708                        name:      "category".to_string(),
709                        json_path: "data->>'category'".to_string(),
710                        data_type: "text".to_string(),
711                    },
712                    DimensionPath {
713                        name:      "product".to_string(),
714                        json_path: "data->>'product'".to_string(),
715                        data_type: "text".to_string(),
716                    },
717                ],
718            },
719            denormalized_filters: vec![FilterColumn {
720                name:     "occurred_at".to_string(),
721                sql_type: SqlType::Timestamp,
722                indexed:  true,
723            }],
724            calendar_dimensions:  vec![],
725        }
726    }
727
728    #[test]
729    fn test_parse_simple_count() {
730        let metadata = create_test_metadata();
731        let query = json!({
732            "table": "tf_sales",
733            "aggregates": [
734                {"count": {}}
735            ]
736        });
737
738        let request =
739            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
740                .unwrap();
741
742        assert_eq!(request.table_name, "tf_sales");
743        assert_eq!(request.aggregates.len(), 1);
744        assert_eq!(request.aggregates[0].alias(), "count");
745    }
746
747    #[test]
748    fn test_parse_group_by_dimension() {
749        let metadata = create_test_metadata();
750        let query = json!({
751            "table": "tf_sales",
752            "groupBy": {
753                "category": true
754            },
755            "aggregates": [
756                {"count": {}}
757            ]
758        });
759
760        let request =
761            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
762                .unwrap();
763
764        assert_eq!(request.group_by.len(), 1);
765        match &request.group_by[0] {
766            GroupBySelection::Dimension { path, alias } => {
767                assert_eq!(path, "category");
768                assert_eq!(alias, "category");
769            },
770            _ => panic!("Expected Dimension selection"),
771        }
772    }
773
774    #[test]
775    fn test_parse_temporal_bucket() {
776        let metadata = create_test_metadata();
777        let query = json!({
778            "table": "tf_sales",
779            "groupBy": {
780                "occurred_at_day": true
781            },
782            "aggregates": [
783                {"count": {}}
784            ]
785        });
786
787        let request =
788            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
789                .unwrap();
790
791        assert_eq!(request.group_by.len(), 1);
792        match &request.group_by[0] {
793            GroupBySelection::TemporalBucket {
794                column,
795                bucket,
796                alias,
797            } => {
798                assert_eq!(column, "occurred_at");
799                assert_eq!(*bucket, TemporalBucket::Day);
800                assert_eq!(alias, "occurred_at_day");
801            },
802            _ => panic!("Expected TemporalBucket selection"),
803        }
804    }
805
806    #[test]
807    fn test_parse_multiple_aggregates() {
808        let metadata = create_test_metadata();
809        let query = json!({
810            "table": "tf_sales",
811            "aggregates": [
812                {"count": {}},
813                {"revenue_sum": {}},
814                {"revenue_avg": {}},
815                {"quantity_max": {}}
816            ]
817        });
818
819        let request =
820            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
821                .unwrap();
822
823        assert_eq!(request.aggregates.len(), 4);
824        assert_eq!(request.aggregates[0].alias(), "count");
825        assert_eq!(request.aggregates[1].alias(), "revenue_sum");
826        assert_eq!(request.aggregates[2].alias(), "revenue_avg");
827        assert_eq!(request.aggregates[3].alias(), "quantity_max");
828    }
829
830    #[test]
831    fn test_parse_having_condition() {
832        let metadata = create_test_metadata();
833        let query = json!({
834            "table": "tf_sales",
835            "aggregates": [
836                {"revenue_sum": {}}
837            ],
838            "having": {
839                "revenue_sum_gt": 1000
840            }
841        });
842
843        let request =
844            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
845                .unwrap();
846
847        assert_eq!(request.having.len(), 1);
848        assert_eq!(request.having[0].operator, HavingOperator::Gt);
849        assert_eq!(request.having[0].value, json!(1000));
850    }
851
852    #[test]
853    fn test_parse_order_by() {
854        let metadata = create_test_metadata();
855        let query = json!({
856            "table": "tf_sales",
857            "aggregates": [
858                {"revenue_sum": {}}
859            ],
860            "orderBy": {
861                "revenue_sum": "DESC"
862            }
863        });
864
865        let request =
866            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
867                .unwrap();
868
869        assert_eq!(request.order_by.len(), 1);
870        assert_eq!(request.order_by[0].field, "revenue_sum");
871        assert_eq!(request.order_by[0].direction, OrderDirection::Desc);
872    }
873
874    #[test]
875    fn test_parse_limit_offset() {
876        let metadata = create_test_metadata();
877        let query = json!({
878            "table": "tf_sales",
879            "aggregates": [
880                {"count": {}}
881            ],
882            "limit": 10,
883            "offset": 5
884        });
885
886        let request =
887            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
888                .unwrap();
889
890        assert_eq!(request.limit, Some(10));
891        assert_eq!(request.offset, Some(5));
892    }
893
894    #[test]
895    fn test_parse_complex_query() {
896        let metadata = create_test_metadata();
897        let query = json!({
898            "table": "tf_sales",
899            "groupBy": {
900                "category": true,
901                "occurred_at_month": true
902            },
903            "aggregates": [
904                {"count": {}},
905                {"revenue_sum": {}},
906                {"revenue_avg": {}},
907                {"quantity_sum": {}}
908            ],
909            "having": {
910                "revenue_sum_gt": 1000,
911                "count_gte": 5
912            },
913            "orderBy": {
914                "revenue_sum": "DESC",
915                "count": "ASC"
916            },
917            "limit": 20
918        });
919
920        let request =
921            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
922                .unwrap();
923
924        assert_eq!(request.table_name, "tf_sales");
925        assert_eq!(request.group_by.len(), 2);
926        assert_eq!(request.aggregates.len(), 4);
927        assert_eq!(request.having.len(), 2);
928        assert_eq!(request.order_by.len(), 2);
929        assert_eq!(request.limit, Some(20));
930    }
931
932    #[test]
933    fn test_parse_count_distinct_default() {
934        let metadata = create_test_metadata();
935        let query = json!({
936            "table": "tf_sales",
937            "aggregates": [
938                {"count_distinct": {}}
939            ]
940        });
941
942        let request =
943            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
944                .unwrap();
945
946        assert_eq!(request.aggregates.len(), 1);
947        match &request.aggregates[0] {
948            AggregateSelection::CountDistinct { field, alias } => {
949                // Defaults to first dimension: "category"
950                assert_eq!(field, "category");
951                assert_eq!(alias, "count_distinct");
952            },
953            _ => panic!("Expected CountDistinct selection"),
954        }
955    }
956
957    #[test]
958    fn test_parse_count_distinct_with_field() {
959        let metadata = create_test_metadata();
960        let query = json!({
961            "table": "tf_sales",
962            "aggregates": [
963                {"product_count_distinct": {}}
964            ]
965        });
966
967        let request =
968            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
969                .unwrap();
970
971        assert_eq!(request.aggregates.len(), 1);
972        match &request.aggregates[0] {
973            AggregateSelection::CountDistinct { field, alias } => {
974                assert_eq!(field, "product");
975                assert_eq!(alias, "product_count_distinct");
976            },
977            _ => panic!("Expected CountDistinct selection"),
978        }
979    }
980
981    #[test]
982    fn test_parse_count_distinct_on_measure() {
983        let metadata = create_test_metadata();
984        let query = json!({
985            "table": "tf_sales",
986            "aggregates": [
987                {"revenue_count_distinct": {}}
988            ]
989        });
990
991        let request =
992            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
993                .unwrap();
994
995        assert_eq!(request.aggregates.len(), 1);
996        match &request.aggregates[0] {
997            AggregateSelection::CountDistinct { field, alias } => {
998                assert_eq!(field, "revenue");
999                assert_eq!(alias, "revenue_count_distinct");
1000            },
1001            _ => panic!("Expected CountDistinct selection"),
1002        }
1003    }
1004
1005    #[test]
1006    fn test_parse_count_distinct_invalid_field() {
1007        let metadata = create_test_metadata();
1008        let query = json!({
1009            "table": "tf_sales",
1010            "aggregates": [
1011                {"nonexistent_count_distinct": {}}
1012            ]
1013        });
1014
1015        let result =
1016            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new());
1017
1018        let err = result.expect_err("expected Err for invalid count_distinct field");
1019        match err {
1020            FraiseQLError::Validation { message, .. } => {
1021                assert!(
1022                    message.contains("COUNT DISTINCT field 'nonexistent' not found"),
1023                    "unexpected message: {message}"
1024                );
1025            },
1026            other => panic!("expected Validation error, got: {other:?}"),
1027        }
1028    }
1029
1030    #[test]
1031    fn test_parse_multiple_count_distinct() {
1032        let metadata = create_test_metadata();
1033        let query = json!({
1034            "table": "tf_sales",
1035            "aggregates": [
1036                {"count": {}},
1037                {"category_count_distinct": {}},
1038                {"product_count_distinct": {}},
1039                {"revenue_sum": {}}
1040            ]
1041        });
1042
1043        let request =
1044            AggregateQueryParser::parse(&query, &metadata, &std::collections::HashMap::new())
1045                .unwrap();
1046
1047        assert_eq!(request.aggregates.len(), 4);
1048        assert_eq!(request.aggregates[0].alias(), "count");
1049        assert_eq!(request.aggregates[1].alias(), "category_count_distinct");
1050        assert_eq!(request.aggregates[2].alias(), "product_count_distinct");
1051        assert_eq!(request.aggregates[3].alias(), "revenue_sum");
1052    }
1053}