Skip to main content

fraiseql_core/compiler/
fact_table.rs

1//! Fact Table Introspection Module
2//!
3//! This module provides functionality to detect and introspect fact tables following
4//! FraiseQL's analytics architecture:
5//!
6//! # Fact Table Pattern
7//!
8//! - **Table naming**: `tf_*` prefix (table fact)
9//! - **Measures**: SQL columns with numeric types (INT, BIGINT, DECIMAL, FLOAT) - for fast
10//!   aggregation
11//! - **Dimensions**: JSONB `data` column - for flexible GROUP BY
12//! - **Denormalized filters**: Indexed SQL columns (customer_id, occurred_at) - for fast WHERE
13//!
14//! # No Joins Principle
15//!
16//! FraiseQL does NOT support joins. All dimensional data must be denormalized into the
17//! `data` JSONB column at ETL time (managed by DBA/data team, not FraiseQL).
18//!
19//! # Example Fact Table
20//!
21//! ```sql
22//! CREATE TABLE tf_sales (
23//!     id BIGSERIAL PRIMARY KEY,
24//!     -- Measures (SQL columns for fast aggregation)
25//!     revenue DECIMAL(10,2) NOT NULL,
26//!     quantity INT NOT NULL,
27//!     cost DECIMAL(10,2) NOT NULL,
28//!     -- Dimensions (JSONB for flexible grouping)
29//!     data JSONB NOT NULL,
30//!     -- Denormalized filters (indexed for fast WHERE)
31//!     customer_id UUID NOT NULL,
32//!     product_id UUID NOT NULL,
33//!     occurred_at TIMESTAMPTZ NOT NULL,
34//!     created_at TIMESTAMPTZ DEFAULT NOW()
35//! );
36//! ```
37
38use async_trait::async_trait;
39use serde::{Deserialize, Serialize};
40
41use crate::error::{FraiseQLError, Result};
42
43/// Database introspection trait for querying table metadata
44#[async_trait]
45pub trait DatabaseIntrospector: Send + Sync {
46    /// List all fact tables in the database (tables starting with "tf_")
47    ///
48    /// Returns: Vec of table names matching the tf_* pattern
49    async fn list_fact_tables(&self) -> Result<Vec<String>>;
50
51    /// Query column information for a table
52    ///
53    /// Returns: Vec of (column_name, data_type, is_nullable)
54    async fn get_columns(&self, table_name: &str) -> Result<Vec<(String, String, bool)>>;
55
56    /// Query indexes for a table
57    ///
58    /// Returns: Vec of column names that have indexes
59    async fn get_indexed_columns(&self, table_name: &str) -> Result<Vec<String>>;
60
61    /// Get database type (for SQL type parsing)
62    fn database_type(&self) -> DatabaseType;
63
64    /// Get sample JSONB data from a column to extract dimension paths
65    ///
66    /// Returns: Sample JSON value from the column, or None if no data exists
67    ///
68    /// Default implementation returns None. Implementations should override
69    /// to query the database for actual sample data.
70    async fn get_sample_jsonb(
71        &self,
72        _table_name: &str,
73        _column_name: &str,
74    ) -> Result<Option<serde_json::Value>> {
75        Ok(None)
76    }
77}
78
79/// Database type enum for SQL type parsing
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub enum DatabaseType {
82    /// PostgreSQL database type
83    PostgreSQL,
84    /// MySQL database type
85    MySQL,
86    /// SQLite database type
87    SQLite,
88    /// SQL Server database type
89    SQLServer,
90}
91
92/// Detects and introspects fact tables
93pub struct FactTableDetector;
94
95/// Metadata about a fact table structure
96#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
97pub struct FactTableMetadata {
98    /// Table name (e.g., "tf_sales")
99    pub table_name:           String,
100    /// Measures (aggregatable numeric columns)
101    pub measures:             Vec<MeasureColumn>,
102    /// Dimension column (JSONB)
103    pub dimensions:           DimensionColumn,
104    /// Denormalized filter columns
105    pub denormalized_filters: Vec<FilterColumn>,
106    /// Calendar dimensions for optimized temporal aggregations
107    pub calendar_dimensions:  Vec<CalendarDimension>,
108}
109
110/// A measure column (aggregatable numeric type)
111#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
112pub struct MeasureColumn {
113    /// Column name (e.g., "revenue")
114    pub name:     String,
115    /// SQL data type
116    pub sql_type: SqlType,
117    /// Is nullable
118    pub nullable: bool,
119}
120
121/// SQL data types
122#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
123pub enum SqlType {
124    /// SMALLINT, INT, INTEGER
125    Int,
126    /// BIGINT
127    BigInt,
128    /// DECIMAL, NUMERIC
129    Decimal,
130    /// REAL, FLOAT, DOUBLE PRECISION
131    Float,
132    /// JSONB (PostgreSQL)
133    Jsonb,
134    /// JSON (MySQL, SQL Server)
135    Json,
136    /// TEXT, VARCHAR
137    Text,
138    /// UUID
139    Uuid,
140    /// TIMESTAMP, TIMESTAMPTZ
141    Timestamp,
142    /// DATE
143    Date,
144    /// BOOLEAN
145    Boolean,
146    /// Other types
147    Other(String),
148}
149
150/// Dimension column (JSONB)
151#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
152pub struct DimensionColumn {
153    /// Column name (default: "dimensions" for fact tables)
154    pub name:  String,
155    /// Detected dimension paths (optional, extracted from sample data)
156    pub paths: Vec<DimensionPath>,
157}
158
159/// A dimension path within the JSONB column
160#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
161pub struct DimensionPath {
162    /// Path name (e.g., "category")
163    pub name:      String,
164    /// JSON path (e.g., "dimensions->>'category'" for PostgreSQL)
165    pub json_path: String,
166    /// Data type hint
167    pub data_type: String,
168}
169
170/// Calendar dimension metadata (pre-computed temporal fields)
171///
172/// Calendar dimensions provide 10-20x performance improvements for temporal aggregations
173/// by using pre-computed JSONB columns (date_info, month_info, etc.) instead of runtime
174/// DATE_TRUNC operations.
175///
176/// # Multi-Column Pattern
177///
178/// - 7 JSONB columns: date_info, week_info, month_info, quarter_info, semester_info, year_info,
179///   decade_info
180/// - Each contains hierarchical temporal buckets (e.g., date_info has: date, week, month, quarter,
181///   year)
182/// - Pre-populated by user's ETL (FraiseQL reads, doesn't populate)
183///
184/// # Example
185///
186/// ```json
187/// {
188///   "date": "2024-03-15",
189///   "week": 11,
190///   "month": 3,
191///   "quarter": 1,
192///   "semester": 1,
193///   "year": 2024
194/// }
195/// ```
196#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
197pub struct CalendarDimension {
198    /// Source timestamp column (e.g., "occurred_at")
199    pub source_column: String,
200
201    /// Available calendar granularity columns
202    pub granularities: Vec<CalendarGranularity>,
203}
204
205/// Calendar granularity column with pre-computed fields
206#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
207pub struct CalendarGranularity {
208    /// Column name (e.g., "date_info", "month_info")
209    pub column_name: String,
210
211    /// Temporal buckets available in this column
212    pub buckets: Vec<CalendarBucket>,
213}
214
215/// Pre-computed temporal bucket in calendar JSONB
216#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
217pub struct CalendarBucket {
218    /// JSON path key (e.g., "date", "month", "quarter")
219    pub json_key: String,
220
221    /// Corresponding TemporalBucket enum
222    pub bucket_type: crate::compiler::aggregate_types::TemporalBucket,
223
224    /// Data type (e.g., "date", "integer")
225    pub data_type: String,
226}
227
228/// A denormalized filter column
229#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
230pub struct FilterColumn {
231    /// Column name (e.g., "customer_id")
232    pub name:     String,
233    /// SQL data type
234    pub sql_type: SqlType,
235    /// Is indexed (for performance)
236    pub indexed:  bool,
237}
238
239/// Aggregation strategy for fact tables
240///
241/// Determines how fact table data is updated and structured.
242///
243/// # Strategies
244///
245/// - **Incremental**: New records added (e.g., transaction logs)
246/// - **AccumulatingSnapshot**: Records updated with new events (e.g., order milestones)
247/// - **PeriodicSnapshot**: Complete snapshot at regular intervals (e.g., daily inventory)
248#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
249pub enum AggregationStrategy {
250    /// New records are appended (e.g., transaction logs, event streams)
251    #[serde(rename = "incremental")]
252    #[default]
253    Incremental,
254
255    /// Records are updated with new events (e.g., order status changes)
256    #[serde(rename = "accumulating_snapshot")]
257    AccumulatingSnapshot,
258
259    /// Complete snapshots at regular intervals (e.g., daily inventory levels)
260    #[serde(rename = "periodic_snapshot")]
261    PeriodicSnapshot,
262}
263
264/// Explicit fact table schema declaration
265///
266/// Allows users to explicitly declare fact table metadata instead of relying on
267/// auto-detection. Explicit declarations take precedence over auto-detected metadata.
268///
269/// # Example
270///
271/// ```json
272/// {
273///   "name": "tf_sales",
274///   "measures": ["amount", "quantity", "discount"],
275///   "dimensions": ["product_id", "region_id", "date_id"],
276///   "primary_key": "id",
277///   "metadata": {
278///     "aggregation_strategy": "incremental",
279///     "grain": ["date", "product", "region"]
280///   }
281/// }
282/// ```
283#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
284pub struct FactTableDeclaration {
285    /// Fact table name (e.g., "tf_sales")
286    pub name: String,
287
288    /// Measure column names (aggregatable numeric fields)
289    pub measures: Vec<String>,
290
291    /// Dimension column names or paths within JSONB
292    pub dimensions: Vec<String>,
293
294    /// Primary key column name
295    pub primary_key: String,
296
297    /// Optional metadata about the fact table
298    pub metadata: Option<FactTableDeclarationMetadata>,
299}
300
301/// Metadata for explicitly declared fact tables
302#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
303pub struct FactTableDeclarationMetadata {
304    /// Aggregation strategy (how data is updated)
305    #[serde(default)]
306    pub aggregation_strategy: AggregationStrategy,
307
308    /// Grain of the fact table (combination of dimensions that makes a unique record)
309    pub grain: Vec<String>,
310
311    /// Column containing snapshot date (for periodic snapshots)
312    pub snapshot_date_column: Option<String>,
313
314    /// Whether this is a slowly changing dimension
315    #[serde(default)]
316    pub is_slowly_changing_dimension: bool,
317}
318
319impl FactTableDetector {
320    /// Detect if a table name follows the fact table pattern.
321    ///
322    /// Fact tables must follow the naming convention: `tf_<table_name>`
323    /// where the table name contains only lowercase letters, numbers, and underscores.
324    ///
325    /// # Arguments
326    ///
327    /// * `table_name` - Table name to check
328    ///
329    /// # Returns
330    ///
331    /// `true` if the table name starts with `tf_` and follows naming conventions,
332    /// `false` otherwise.
333    ///
334    /// # Notes
335    ///
336    /// - The check is strict: `tf_` is required as a prefix
337    /// - Table names like `TF_sales` (uppercase prefix) are NOT recognized as fact tables
338    /// - Empty strings and tables named just `tf_` without additional suffix are not valid
339    ///
340    /// # Examples
341    ///
342    /// ```
343    /// use fraiseql_core::compiler::fact_table::FactTableDetector;
344    ///
345    /// assert!(FactTableDetector::is_fact_table("tf_sales"));
346    /// assert!(FactTableDetector::is_fact_table("tf_events"));
347    /// assert!(FactTableDetector::is_fact_table("tf_page_views_daily"));
348    /// assert!(!FactTableDetector::is_fact_table("ta_sales_by_day"));
349    /// assert!(!FactTableDetector::is_fact_table("v_user"));
350    /// assert!(!FactTableDetector::is_fact_table("TF_sales")); // uppercase prefix not recognized
351    /// assert!(!FactTableDetector::is_fact_table("tf_")); // incomplete name
352    /// ```
353    pub fn is_fact_table(table_name: &str) -> bool {
354        // Must start with "tf_" and have at least one more character
355        table_name.len() > 3 && table_name.starts_with("tf_")
356    }
357
358    /// Introspect a fact table from the database
359    ///
360    /// Queries the database schema to extract:
361    /// - Measures (numeric columns)
362    /// - Dimensions (JSONB/JSON columns)
363    /// - Denormalized filters (indexed columns)
364    ///
365    /// # Arguments
366    ///
367    /// * `introspector` - Database introspection implementation
368    /// * `table_name` - Fact table name (must start with "tf_")
369    ///
370    /// # Errors
371    ///
372    /// Returns error if:
373    /// - Table is not a fact table (doesn't start with "tf_")
374    /// - Database query fails
375    /// - Table structure is invalid
376    ///
377    /// # Example
378    ///
379    /// ```rust,no_run
380    /// use fraiseql_core::compiler::fact_table::{FactTableDetector, DatabaseIntrospector};
381    ///
382    /// # async fn example(db: impl DatabaseIntrospector) -> Result<(), Box<dyn std::error::Error>> {
383    /// let metadata = FactTableDetector::introspect(&db, "tf_sales").await?;
384    /// println!("Found {} measures", metadata.measures.len());
385    /// # Ok(())
386    /// # }
387    /// ```
388    pub async fn introspect(
389        introspector: &impl DatabaseIntrospector,
390        table_name: &str,
391    ) -> Result<FactTableMetadata> {
392        // Validate table name follows fact table pattern
393        if !Self::is_fact_table(table_name) {
394            return Err(FraiseQLError::Validation {
395                message: format!(
396                    "Table '{}' is not a fact table (must start with 'tf_')",
397                    table_name
398                ),
399                path:    None,
400            });
401        }
402
403        // Query column information
404        let columns = introspector.get_columns(table_name).await?;
405        if columns.is_empty() {
406            return Err(FraiseQLError::Validation {
407                message: format!("Table '{}' not found or has no columns", table_name),
408                path:    None,
409            });
410        }
411
412        // Query indexed columns
413        let indexed_columns = introspector.get_indexed_columns(table_name).await?;
414        let indexed_set: std::collections::HashSet<String> = indexed_columns.into_iter().collect();
415
416        // Parse SQL types based on database
417        let db_type = introspector.database_type();
418
419        let mut measures = Vec::new();
420        let mut dimension_column: Option<DimensionColumn> = None;
421        let mut filters = Vec::new();
422
423        for (name, data_type, is_nullable) in &columns {
424            let sql_type = Self::parse_sql_type(data_type, db_type);
425
426            match sql_type {
427                SqlType::Jsonb | SqlType::Json => {
428                    // This is the dimension column - try to extract paths from sample data
429                    let paths = if let Ok(Some(sample)) =
430                        introspector.get_sample_jsonb(table_name, name).await
431                    {
432                        Self::extract_dimension_paths(&sample, name, db_type)
433                    } else {
434                        Vec::new()
435                    };
436                    dimension_column = Some(DimensionColumn {
437                        name: name.clone(),
438                        paths,
439                    });
440                },
441                SqlType::Int | SqlType::BigInt | SqlType::Decimal | SqlType::Float => {
442                    // Skip common non-measure columns
443                    if name != "id" && !name.ends_with("_id") {
444                        measures.push(MeasureColumn {
445                            name:     name.clone(),
446                            sql_type: sql_type.clone(),
447                            nullable: *is_nullable,
448                        });
449                    }
450
451                    // Check if it's a denormalized filter
452                    if name.ends_with("_id") && indexed_set.contains(name.as_str()) {
453                        filters.push(FilterColumn {
454                            name:     name.clone(),
455                            sql_type: sql_type.clone(),
456                            indexed:  true,
457                        });
458                    }
459                },
460                _ => {
461                    // Other types might be denormalized filters
462                    if name != "id"
463                        && name != "created_at"
464                        && name != "updated_at"
465                        && name != "occurred_at"
466                    {
467                        filters.push(FilterColumn {
468                            name: name.clone(),
469                            sql_type,
470                            indexed: indexed_set.contains(name.as_str()),
471                        });
472                    } else if (name == "occurred_at" || name == "created_at")
473                        && indexed_set.contains(name.as_str())
474                    {
475                        // Timestamp columns are important filters if indexed
476                        filters.push(FilterColumn {
477                            name: name.clone(),
478                            sql_type,
479                            indexed: true,
480                        });
481                    }
482                },
483            }
484        }
485
486        // Detect calendar dimensions
487        let calendar_dimensions = Self::detect_calendar_dimensions(&columns, &indexed_set)?;
488
489        let metadata = FactTableMetadata {
490            table_name: table_name.to_string(),
491            measures,
492            dimensions: dimension_column.unwrap_or(DimensionColumn {
493                name:  "dimensions".to_string(),
494                paths: Vec::new(),
495            }),
496            denormalized_filters: filters,
497            calendar_dimensions,
498        };
499
500        Self::validate(&metadata)?;
501        Ok(metadata)
502    }
503
504    /// Parse SQL type string to SqlType enum
505    fn parse_sql_type(type_name: &str, db_type: DatabaseType) -> SqlType {
506        match db_type {
507            DatabaseType::PostgreSQL => SqlType::from_str_postgres(type_name),
508            DatabaseType::MySQL => SqlType::from_str_mysql(type_name),
509            DatabaseType::SQLite => SqlType::from_str_sqlite(type_name),
510            DatabaseType::SQLServer => SqlType::from_str_sqlserver(type_name),
511        }
512    }
513
514    /// Validate fact table structure
515    ///
516    /// # Errors
517    ///
518    /// Returns error if:
519    /// - No measures found
520    /// - No dimension column found
521    /// - Measures are not numeric types
522    pub fn validate(metadata: &FactTableMetadata) -> Result<()> {
523        // Must have at least one measure
524        if metadata.measures.is_empty() {
525            return Err(FraiseQLError::Validation {
526                message: format!(
527                    "Fact table '{}' must have at least one measure column",
528                    metadata.table_name
529                ),
530                path:    None,
531            });
532        }
533
534        // Validate all measures are numeric
535        for measure in &metadata.measures {
536            if !Self::is_numeric_type(&measure.sql_type) {
537                return Err(FraiseQLError::Validation {
538                    message: format!(
539                        "Measure column '{}' must be numeric type, found {:?}",
540                        measure.name, measure.sql_type
541                    ),
542                    path:    None,
543                });
544            }
545        }
546
547        // Must have dimension column
548        if metadata.dimensions.name.is_empty() {
549            return Err(FraiseQLError::Validation {
550                message: format!(
551                    "Fact table '{}' must have a dimension column (JSONB)",
552                    metadata.table_name
553                ),
554                path:    None,
555            });
556        }
557
558        Ok(())
559    }
560
561    /// Check if SQL type is numeric (suitable for aggregation)
562    fn is_numeric_type(sql_type: &SqlType) -> bool {
563        matches!(sql_type, SqlType::Int | SqlType::BigInt | SqlType::Decimal | SqlType::Float)
564    }
565
566    /// Extract dimension paths from a sample JSON value
567    ///
568    /// Walks through the JSON structure and extracts top-level keys as dimension paths.
569    /// Nested objects are represented with dot notation (e.g., "customer.region").
570    ///
571    /// # Arguments
572    ///
573    /// * `sample` - Sample JSON value from the dimension column
574    /// * `column_name` - Name of the JSONB column (e.g., "dimensions")
575    /// * `db_type` - Database type for generating correct JSON path syntax
576    ///
577    /// # Returns
578    ///
579    /// Vec of `DimensionPath` extracted from the sample data
580    pub fn extract_dimension_paths(
581        sample: &serde_json::Value,
582        column_name: &str,
583        db_type: DatabaseType,
584    ) -> Vec<DimensionPath> {
585        let mut paths = Vec::new();
586        Self::extract_paths_recursive(sample, column_name, "", &mut paths, db_type, 0);
587        paths
588    }
589
590    /// Recursively extract paths from JSON structure
591    fn extract_paths_recursive(
592        value: &serde_json::Value,
593        column_name: &str,
594        prefix: &str,
595        paths: &mut Vec<DimensionPath>,
596        db_type: DatabaseType,
597        depth: usize,
598    ) {
599        // Limit depth to avoid infinite recursion on deeply nested structures
600        if depth > 3 {
601            return;
602        }
603
604        if let Some(obj) = value.as_object() {
605            for (key, val) in obj {
606                let full_path = if prefix.is_empty() {
607                    key.clone()
608                } else {
609                    format!("{}.{}", prefix, key)
610                };
611
612                // Determine data type from the value
613                let data_type = Self::infer_json_type(val);
614
615                // Generate database-specific JSON path syntax
616                let json_path = Self::generate_json_path(column_name, &full_path, db_type);
617
618                paths.push(DimensionPath {
619                    name: full_path.replace('.', "_"), /* Convert dots to underscores for field
620                                                        * names */
621                    json_path,
622                    data_type,
623                });
624
625                // Recurse into nested objects
626                if val.is_object() {
627                    Self::extract_paths_recursive(
628                        val,
629                        column_name,
630                        &full_path,
631                        paths,
632                        db_type,
633                        depth + 1,
634                    );
635                }
636            }
637        }
638    }
639
640    /// Infer JSON data type from a value
641    fn infer_json_type(value: &serde_json::Value) -> String {
642        match value {
643            serde_json::Value::Null => "string".to_string(),
644            serde_json::Value::Bool(_) => "boolean".to_string(),
645            serde_json::Value::Number(n) => {
646                if n.is_i64() || n.is_u64() {
647                    "integer".to_string()
648                } else {
649                    "float".to_string()
650                }
651            },
652            serde_json::Value::String(_) => "string".to_string(),
653            serde_json::Value::Array(_) => "array".to_string(),
654            serde_json::Value::Object(_) => "object".to_string(),
655        }
656    }
657
658    /// Generate database-specific JSON path syntax
659    fn generate_json_path(column_name: &str, path: &str, db_type: DatabaseType) -> String {
660        let parts: Vec<&str> = path.split('.').collect();
661
662        match db_type {
663            DatabaseType::PostgreSQL => {
664                // PostgreSQL: column->>'key' for top-level, column->'nested'->>'key' for nested
665                if parts.is_empty() {
666                    // Safety: handle empty path by returning raw column name
667                    column_name.to_string()
668                } else if parts.len() == 1 {
669                    format!("{}->>'{}'", column_name, parts[0])
670                } else {
671                    // Safe: parts.len() >= 2 is guaranteed here
672                    if let Some(last) = parts.last() {
673                        let rest = &parts[..parts.len() - 1];
674                        let nav = rest.iter().fold(String::new(), |mut acc, p| {
675                            use std::fmt::Write;
676                            let _ = write!(acc, "->'{}'", p);
677                            acc
678                        });
679                        format!("{}{}->>'{}'", column_name, nav, last)
680                    } else {
681                        // This branch is unreachable due to length check, but safe fallback
682                        column_name.to_string()
683                    }
684                }
685            },
686            DatabaseType::MySQL => {
687                // MySQL: JSON_EXTRACT(column, '$.path.to.key')
688                format!("JSON_UNQUOTE(JSON_EXTRACT({}, '$.{}')", column_name, path)
689            },
690            DatabaseType::SQLite => {
691                // SQLite: json_extract(column, '$.path.to.key')
692                format!("json_extract({}, '$.{}')", column_name, path)
693            },
694            DatabaseType::SQLServer => {
695                // SQL Server: JSON_VALUE(column, '$.path.to.key')
696                format!("JSON_VALUE({}, '$.{}')", column_name, path)
697            },
698        }
699    }
700
701    /// Detect calendar dimension columns (date_info, week_info, etc.)
702    ///
703    /// Looks for `*_info` JSONB/JSON columns following the calendar dimension pattern.
704    /// Returns calendar dimension metadata if calendar columns are found.
705    ///
706    /// # Arguments
707    ///
708    /// * `columns` - List of (name, data_type, nullable) tuples
709    /// * `_indexed_set` - Set of indexed columns (unused, for future optimization detection)
710    ///
711    /// # Returns
712    ///
713    /// Vec of calendar dimensions (empty if none found)
714    fn detect_calendar_dimensions(
715        columns: &[(String, String, bool)],
716        _indexed_set: &std::collections::HashSet<String>,
717    ) -> Result<Vec<CalendarDimension>> {
718        // Look for *_info columns with JSONB/JSON type
719        let calendar_columns: Vec<String> = columns
720            .iter()
721            .filter(|(name, data_type, _)| {
722                name.ends_with("_info")
723                    && (data_type.to_lowercase().contains("json")
724                        || data_type.to_lowercase().contains("jsonb"))
725            })
726            .map(|(name, _, _)| name.clone())
727            .collect();
728
729        if calendar_columns.is_empty() {
730            return Ok(Vec::new());
731        }
732
733        // Build granularities based on calendar dimension pattern
734        let mut granularities = Vec::new();
735        for col_name in calendar_columns {
736            let buckets = Self::infer_calendar_buckets(&col_name);
737            if !buckets.is_empty() {
738                granularities.push(CalendarGranularity {
739                    column_name: col_name,
740                    buckets,
741                });
742            }
743        }
744
745        if granularities.is_empty() {
746            return Ok(Vec::new());
747        }
748
749        // Assume single source column "occurred_at"
750        // (could be enhanced to detect from schema later)
751        Ok(vec![CalendarDimension {
752            source_column: "occurred_at".to_string(),
753            granularities,
754        }])
755    }
756
757    /// Map calendar column names to available buckets (standard pattern)
758    ///
759    /// # Arguments
760    ///
761    /// * `column_name` - Name of the calendar column (e.g., "date_info", "month_info")
762    ///
763    /// # Returns
764    ///
765    /// Vec of calendar buckets available in this column
766    fn infer_calendar_buckets(column_name: &str) -> Vec<CalendarBucket> {
767        use crate::compiler::aggregate_types::TemporalBucket;
768
769        match column_name {
770            "date_info" => vec![
771                CalendarBucket {
772                    json_key:    "date".to_string(),
773                    bucket_type: TemporalBucket::Day,
774                    data_type:   "date".to_string(),
775                },
776                CalendarBucket {
777                    json_key:    "week".to_string(),
778                    bucket_type: TemporalBucket::Week,
779                    data_type:   "integer".to_string(),
780                },
781                CalendarBucket {
782                    json_key:    "month".to_string(),
783                    bucket_type: TemporalBucket::Month,
784                    data_type:   "integer".to_string(),
785                },
786                CalendarBucket {
787                    json_key:    "quarter".to_string(),
788                    bucket_type: TemporalBucket::Quarter,
789                    data_type:   "integer".to_string(),
790                },
791                CalendarBucket {
792                    json_key:    "year".to_string(),
793                    bucket_type: TemporalBucket::Year,
794                    data_type:   "integer".to_string(),
795                },
796            ],
797            "week_info" => vec![
798                CalendarBucket {
799                    json_key:    "week".to_string(),
800                    bucket_type: TemporalBucket::Week,
801                    data_type:   "integer".to_string(),
802                },
803                CalendarBucket {
804                    json_key:    "month".to_string(),
805                    bucket_type: TemporalBucket::Month,
806                    data_type:   "integer".to_string(),
807                },
808                CalendarBucket {
809                    json_key:    "quarter".to_string(),
810                    bucket_type: TemporalBucket::Quarter,
811                    data_type:   "integer".to_string(),
812                },
813                CalendarBucket {
814                    json_key:    "year".to_string(),
815                    bucket_type: TemporalBucket::Year,
816                    data_type:   "integer".to_string(),
817                },
818            ],
819            "month_info" => vec![
820                CalendarBucket {
821                    json_key:    "month".to_string(),
822                    bucket_type: TemporalBucket::Month,
823                    data_type:   "integer".to_string(),
824                },
825                CalendarBucket {
826                    json_key:    "quarter".to_string(),
827                    bucket_type: TemporalBucket::Quarter,
828                    data_type:   "integer".to_string(),
829                },
830                CalendarBucket {
831                    json_key:    "year".to_string(),
832                    bucket_type: TemporalBucket::Year,
833                    data_type:   "integer".to_string(),
834                },
835            ],
836            "quarter_info" => vec![
837                CalendarBucket {
838                    json_key:    "quarter".to_string(),
839                    bucket_type: TemporalBucket::Quarter,
840                    data_type:   "integer".to_string(),
841                },
842                CalendarBucket {
843                    json_key:    "year".to_string(),
844                    bucket_type: TemporalBucket::Year,
845                    data_type:   "integer".to_string(),
846                },
847            ],
848            "semester_info" => vec![
849                CalendarBucket {
850                    json_key:    "semester".to_string(),
851                    bucket_type: TemporalBucket::Quarter, // Map to Quarter for now
852                    data_type:   "integer".to_string(),
853                },
854                CalendarBucket {
855                    json_key:    "year".to_string(),
856                    bucket_type: TemporalBucket::Year,
857                    data_type:   "integer".to_string(),
858                },
859            ],
860            "year_info" => vec![CalendarBucket {
861                json_key:    "year".to_string(),
862                bucket_type: TemporalBucket::Year,
863                data_type:   "integer".to_string(),
864            }],
865            _ => Vec::new(),
866        }
867    }
868
869    /// Create metadata from column definitions (for testing)
870    pub fn from_columns(
871        table_name: String,
872        columns: Vec<(&str, SqlType, bool)>,
873    ) -> Result<FactTableMetadata> {
874        let mut measures = Vec::new();
875        let mut dimension_column: Option<DimensionColumn> = None;
876        let mut filters = Vec::new();
877
878        for (name, sql_type, nullable) in columns {
879            match sql_type {
880                SqlType::Jsonb | SqlType::Json => {
881                    // This is the dimension column
882                    dimension_column = Some(DimensionColumn {
883                        name:  name.to_string(),
884                        paths: Vec::new(),
885                    });
886                },
887                SqlType::Int | SqlType::BigInt | SqlType::Decimal | SqlType::Float => {
888                    // Skip id column
889                    if name != "id" && !name.ends_with("_id") {
890                        // This is a measure
891                        measures.push(MeasureColumn {
892                            name: name.to_string(),
893                            sql_type,
894                            nullable,
895                        });
896                    } else if name != "id" {
897                        // This is a filter (_id columns)
898                        filters.push(FilterColumn {
899                            name: name.to_string(),
900                            sql_type,
901                            indexed: false,
902                        });
903                    }
904                },
905                _ => {
906                    // This might be a filter column (if not id/created_at/updated_at)
907                    if name != "id" && name != "created_at" && name != "updated_at" {
908                        filters.push(FilterColumn {
909                            name: name.to_string(),
910                            sql_type,
911                            indexed: false, // Would need to query indexes to determine
912                        });
913                    }
914                },
915            }
916        }
917
918        let metadata = FactTableMetadata {
919            table_name,
920            measures,
921            dimensions: dimension_column.unwrap_or(DimensionColumn {
922                name:  "dimensions".to_string(),
923                paths: Vec::new(),
924            }),
925            denormalized_filters: filters,
926            calendar_dimensions: Vec::new(), // No calendar detection in test helper
927        };
928
929        Self::validate(&metadata)?;
930        Ok(metadata)
931    }
932}
933
934impl SqlType {
935    /// Parse SQL type from string (database-specific)
936    pub fn from_str_postgres(type_name: &str) -> Self {
937        match type_name.to_lowercase().as_str() {
938            "smallint" | "int" | "integer" | "int2" | "int4" => Self::Int,
939            "bigint" | "int8" => Self::BigInt,
940            "decimal" | "numeric" => Self::Decimal,
941            "real" | "float" | "double precision" | "float4" | "float8" => Self::Float,
942            "jsonb" => Self::Jsonb,
943            "json" => Self::Json,
944            "text" | "varchar" | "character varying" | "char" | "character" => Self::Text,
945            "uuid" => Self::Uuid,
946            "timestamp"
947            | "timestamptz"
948            | "timestamp with time zone"
949            | "timestamp without time zone" => Self::Timestamp,
950            "date" => Self::Date,
951            "boolean" | "bool" => Self::Boolean,
952            other => Self::Other(other.to_string()),
953        }
954    }
955
956    /// Parse SQL type from string (MySQL)
957    pub fn from_str_mysql(type_name: &str) -> Self {
958        match type_name.to_lowercase().as_str() {
959            "tinyint" | "smallint" | "mediumint" | "int" | "integer" => Self::Int,
960            "bigint" => Self::BigInt,
961            "decimal" | "numeric" => Self::Decimal,
962            "float" | "double" | "real" => Self::Float,
963            "json" => Self::Json,
964            "text" | "varchar" | "char" | "tinytext" | "mediumtext" | "longtext" => Self::Text,
965            "timestamp" | "datetime" => Self::Timestamp,
966            "date" => Self::Date,
967            "boolean" | "bool" | "tinyint(1)" => Self::Boolean,
968            other => Self::Other(other.to_string()),
969        }
970    }
971
972    /// Parse SQL type from string (SQLite)
973    pub fn from_str_sqlite(type_name: &str) -> Self {
974        match type_name.to_lowercase().as_str() {
975            "integer" | "int" => Self::BigInt, // SQLite INTEGER is 64-bit
976            "real" | "double" | "float" => Self::Float,
977            "numeric" | "decimal" => Self::Decimal,
978            "text" | "varchar" | "char" => Self::Text,
979            "blob" => Self::Other("BLOB".to_string()),
980            other => Self::Other(other.to_string()),
981        }
982    }
983
984    /// Parse SQL type from string (SQL Server)
985    pub fn from_str_sqlserver(type_name: &str) -> Self {
986        match type_name.to_lowercase().as_str() {
987            "tinyint" | "smallint" | "int" => Self::Int,
988            "bigint" => Self::BigInt,
989            "decimal" | "numeric" | "money" | "smallmoney" => Self::Decimal,
990            "float" | "real" => Self::Float,
991            "nvarchar" | "varchar" | "char" | "nchar" | "text" | "ntext" => Self::Text,
992            "uniqueidentifier" => Self::Uuid,
993            "datetime" | "datetime2" | "smalldatetime" | "datetimeoffset" => Self::Timestamp,
994            "date" => Self::Date,
995            "bit" => Self::Boolean,
996            other => Self::Other(other.to_string()),
997        }
998    }
999}
1000
1001#[cfg(test)]
1002mod tests {
1003    use super::*;
1004
1005    #[test]
1006    fn test_is_fact_table() {
1007        assert!(FactTableDetector::is_fact_table("tf_sales"));
1008        assert!(FactTableDetector::is_fact_table("tf_events"));
1009        assert!(FactTableDetector::is_fact_table("tf_api_requests"));
1010        assert!(!FactTableDetector::is_fact_table("ta_sales_by_day"));
1011        assert!(!FactTableDetector::is_fact_table("td_products"));
1012        assert!(!FactTableDetector::is_fact_table("v_user"));
1013        assert!(!FactTableDetector::is_fact_table("tb_user"));
1014    }
1015
1016    #[test]
1017    fn test_validate_valid_fact_table() {
1018        let metadata = FactTableMetadata {
1019            table_name:           "tf_sales".to_string(),
1020            measures:             vec![MeasureColumn {
1021                name:     "revenue".to_string(),
1022                sql_type: SqlType::Decimal,
1023                nullable: false,
1024            }],
1025            dimensions:           DimensionColumn {
1026                name:  "dimensions".to_string(),
1027                paths: vec![],
1028            },
1029            denormalized_filters: vec![],
1030            calendar_dimensions:  vec![],
1031        };
1032
1033        assert!(FactTableDetector::validate(&metadata).is_ok());
1034    }
1035
1036    #[test]
1037    fn test_validate_missing_measures() {
1038        let metadata = FactTableMetadata {
1039            table_name:           "tf_sales".to_string(),
1040            measures:             vec![],
1041            dimensions:           DimensionColumn {
1042                name:  "dimensions".to_string(),
1043                paths: vec![],
1044            },
1045            denormalized_filters: vec![],
1046            calendar_dimensions:  vec![],
1047        };
1048
1049        let result = FactTableDetector::validate(&metadata);
1050        assert!(result.is_err());
1051        assert!(result.unwrap_err().to_string().contains("at least one measure"));
1052    }
1053
1054    #[test]
1055    fn test_validate_non_numeric_measure() {
1056        let metadata = FactTableMetadata {
1057            table_name:           "tf_sales".to_string(),
1058            measures:             vec![MeasureColumn {
1059                name:     "category".to_string(),
1060                sql_type: SqlType::Text, // Wrong type for measure!
1061                nullable: false,
1062            }],
1063            dimensions:           DimensionColumn {
1064                name:  "dimensions".to_string(),
1065                paths: vec![],
1066            },
1067            denormalized_filters: vec![],
1068            calendar_dimensions:  vec![],
1069        };
1070
1071        let result = FactTableDetector::validate(&metadata);
1072        assert!(result.is_err());
1073        assert!(result.unwrap_err().to_string().contains("must be numeric"));
1074    }
1075
1076    #[test]
1077    fn test_from_columns() {
1078        let columns = vec![
1079            ("id", SqlType::BigInt, false),
1080            ("revenue", SqlType::Decimal, false),
1081            ("quantity", SqlType::Int, false),
1082            ("dimensions", SqlType::Jsonb, false),
1083            ("customer_id", SqlType::Uuid, false),
1084            ("occurred_at", SqlType::Timestamp, false),
1085        ];
1086
1087        let metadata = FactTableDetector::from_columns("tf_sales".to_string(), columns).unwrap();
1088
1089        assert_eq!(metadata.measures.len(), 2);
1090        assert_eq!(metadata.measures[0].name, "revenue");
1091        assert_eq!(metadata.measures[1].name, "quantity");
1092        assert_eq!(metadata.dimensions.name, "dimensions");
1093        assert_eq!(metadata.denormalized_filters.len(), 2);
1094        assert_eq!(metadata.denormalized_filters[0].name, "customer_id");
1095        assert_eq!(metadata.denormalized_filters[1].name, "occurred_at");
1096    }
1097
1098    #[test]
1099    fn test_sql_type_from_str_postgres() {
1100        assert_eq!(SqlType::from_str_postgres("integer"), SqlType::Int);
1101        assert_eq!(SqlType::from_str_postgres("BIGINT"), SqlType::BigInt);
1102        assert_eq!(SqlType::from_str_postgres("decimal"), SqlType::Decimal);
1103        assert_eq!(SqlType::from_str_postgres("FLOAT"), SqlType::Float);
1104        assert_eq!(SqlType::from_str_postgres("jsonb"), SqlType::Jsonb);
1105        assert_eq!(SqlType::from_str_postgres("text"), SqlType::Text);
1106        assert_eq!(SqlType::from_str_postgres("uuid"), SqlType::Uuid);
1107        assert_eq!(SqlType::from_str_postgres("timestamptz"), SqlType::Timestamp);
1108    }
1109
1110    #[test]
1111    fn test_sql_type_from_str_mysql() {
1112        assert_eq!(SqlType::from_str_mysql("INT"), SqlType::Int);
1113        assert_eq!(SqlType::from_str_mysql("bigint"), SqlType::BigInt);
1114        assert_eq!(SqlType::from_str_mysql("DECIMAL"), SqlType::Decimal);
1115        assert_eq!(SqlType::from_str_mysql("double"), SqlType::Float);
1116        assert_eq!(SqlType::from_str_mysql("json"), SqlType::Json);
1117        assert_eq!(SqlType::from_str_mysql("VARCHAR"), SqlType::Text);
1118    }
1119
1120    #[test]
1121    fn test_sql_type_from_str_sqlite() {
1122        assert_eq!(SqlType::from_str_sqlite("INTEGER"), SqlType::BigInt);
1123        assert_eq!(SqlType::from_str_sqlite("real"), SqlType::Float);
1124        assert_eq!(SqlType::from_str_sqlite("TEXT"), SqlType::Text);
1125    }
1126
1127    #[test]
1128    fn test_sql_type_from_str_sqlserver() {
1129        assert_eq!(SqlType::from_str_sqlserver("INT"), SqlType::Int);
1130        assert_eq!(SqlType::from_str_sqlserver("BIGINT"), SqlType::BigInt);
1131        assert_eq!(SqlType::from_str_sqlserver("decimal"), SqlType::Decimal);
1132        assert_eq!(SqlType::from_str_sqlserver("float"), SqlType::Float);
1133        assert_eq!(SqlType::from_str_sqlserver("NVARCHAR"), SqlType::Text);
1134        assert_eq!(SqlType::from_str_sqlserver("uniqueidentifier"), SqlType::Uuid);
1135    }
1136
1137    #[test]
1138    fn test_is_numeric_type() {
1139        assert!(FactTableDetector::is_numeric_type(&SqlType::Int));
1140        assert!(FactTableDetector::is_numeric_type(&SqlType::BigInt));
1141        assert!(FactTableDetector::is_numeric_type(&SqlType::Decimal));
1142        assert!(FactTableDetector::is_numeric_type(&SqlType::Float));
1143        assert!(!FactTableDetector::is_numeric_type(&SqlType::Text));
1144        assert!(!FactTableDetector::is_numeric_type(&SqlType::Jsonb));
1145        assert!(!FactTableDetector::is_numeric_type(&SqlType::Uuid));
1146    }
1147
1148    // =============================================================================
1149    // Calendar Dimension Tests
1150    // =============================================================================
1151
1152    #[test]
1153    fn test_detect_calendar_dimensions() {
1154        let columns = vec![
1155            ("revenue".to_string(), "decimal".to_string(), false),
1156            ("data".to_string(), "jsonb".to_string(), false),
1157            ("date_info".to_string(), "jsonb".to_string(), false),
1158            ("month_info".to_string(), "jsonb".to_string(), false),
1159            ("occurred_at".to_string(), "timestamptz".to_string(), false),
1160        ];
1161
1162        let indexed = std::collections::HashSet::new();
1163        let calendar_dims =
1164            FactTableDetector::detect_calendar_dimensions(&columns, &indexed).unwrap();
1165
1166        assert_eq!(calendar_dims.len(), 1);
1167        assert_eq!(calendar_dims[0].source_column, "occurred_at");
1168        assert_eq!(calendar_dims[0].granularities.len(), 2); // date_info, month_info
1169
1170        // Verify date_info buckets
1171        let date_info = &calendar_dims[0].granularities[0];
1172        assert_eq!(date_info.column_name, "date_info");
1173        assert_eq!(date_info.buckets.len(), 5); // day, week, month, quarter, year
1174
1175        assert_eq!(date_info.buckets[0].json_key, "date");
1176        assert_eq!(
1177            date_info.buckets[0].bucket_type,
1178            crate::compiler::aggregate_types::TemporalBucket::Day
1179        );
1180        assert_eq!(date_info.buckets[0].data_type, "date");
1181
1182        // Verify month_info buckets
1183        let month_info = &calendar_dims[0].granularities[1];
1184        assert_eq!(month_info.column_name, "month_info");
1185        assert_eq!(month_info.buckets.len(), 3); // month, quarter, year
1186    }
1187
1188    #[test]
1189    fn test_infer_calendar_buckets_date_info() {
1190        let buckets = FactTableDetector::infer_calendar_buckets("date_info");
1191        assert_eq!(buckets.len(), 5);
1192
1193        assert_eq!(buckets[0].json_key, "date");
1194        assert_eq!(buckets[0].bucket_type, crate::compiler::aggregate_types::TemporalBucket::Day);
1195
1196        assert_eq!(buckets[1].json_key, "week");
1197        assert_eq!(buckets[1].bucket_type, crate::compiler::aggregate_types::TemporalBucket::Week);
1198
1199        assert_eq!(buckets[2].json_key, "month");
1200        assert_eq!(buckets[2].bucket_type, crate::compiler::aggregate_types::TemporalBucket::Month);
1201
1202        assert_eq!(buckets[3].json_key, "quarter");
1203        assert_eq!(
1204            buckets[3].bucket_type,
1205            crate::compiler::aggregate_types::TemporalBucket::Quarter
1206        );
1207
1208        assert_eq!(buckets[4].json_key, "year");
1209        assert_eq!(buckets[4].bucket_type, crate::compiler::aggregate_types::TemporalBucket::Year);
1210    }
1211
1212    #[test]
1213    fn test_infer_calendar_buckets_month_info() {
1214        let buckets = FactTableDetector::infer_calendar_buckets("month_info");
1215        assert_eq!(buckets.len(), 3);
1216
1217        assert_eq!(buckets[0].json_key, "month");
1218        assert_eq!(buckets[1].json_key, "quarter");
1219        assert_eq!(buckets[2].json_key, "year");
1220    }
1221
1222    #[test]
1223    fn test_infer_calendar_buckets_year_info() {
1224        let buckets = FactTableDetector::infer_calendar_buckets("year_info");
1225        assert_eq!(buckets.len(), 1);
1226
1227        assert_eq!(buckets[0].json_key, "year");
1228        assert_eq!(buckets[0].bucket_type, crate::compiler::aggregate_types::TemporalBucket::Year);
1229    }
1230
1231    #[test]
1232    fn test_infer_calendar_buckets_unknown() {
1233        let buckets = FactTableDetector::infer_calendar_buckets("unknown_info");
1234        assert_eq!(buckets.len(), 0);
1235    }
1236
1237    #[test]
1238    fn test_no_calendar_columns() {
1239        let columns = vec![
1240            ("revenue".to_string(), "decimal".to_string(), false),
1241            ("occurred_at".to_string(), "timestamptz".to_string(), false),
1242        ];
1243
1244        let indexed = std::collections::HashSet::new();
1245        let calendar_dims =
1246            FactTableDetector::detect_calendar_dimensions(&columns, &indexed).unwrap();
1247
1248        assert_eq!(calendar_dims.len(), 0); // No calendar columns detected
1249    }
1250
1251    #[test]
1252    fn test_calendar_detection_json_type() {
1253        // Test MySQL/SQLite JSON type (not just PostgreSQL JSONB)
1254        let columns = vec![
1255            ("revenue".to_string(), "decimal".to_string(), false),
1256            ("date_info".to_string(), "json".to_string(), false), // MySQL/SQLite
1257            ("occurred_at".to_string(), "timestamp".to_string(), false),
1258        ];
1259
1260        let indexed = std::collections::HashSet::new();
1261        let calendar_dims =
1262            FactTableDetector::detect_calendar_dimensions(&columns, &indexed).unwrap();
1263
1264        assert_eq!(calendar_dims.len(), 1);
1265        assert_eq!(calendar_dims[0].granularities.len(), 1); // date_info
1266        assert_eq!(calendar_dims[0].granularities[0].column_name, "date_info");
1267    }
1268
1269    #[test]
1270    fn test_single_date_info_column() {
1271        // Test that a single date_info column is detected and used
1272        let columns = vec![
1273            ("revenue".to_string(), "decimal".to_string(), false),
1274            ("data".to_string(), "jsonb".to_string(), false),
1275            ("date_info".to_string(), "jsonb".to_string(), false), // Only this calendar column
1276            ("occurred_at".to_string(), "timestamptz".to_string(), false),
1277        ];
1278
1279        let indexed = std::collections::HashSet::new();
1280        let calendar_dims =
1281            FactTableDetector::detect_calendar_dimensions(&columns, &indexed).unwrap();
1282
1283        assert_eq!(calendar_dims.len(), 1);
1284        assert_eq!(calendar_dims[0].source_column, "occurred_at");
1285        assert_eq!(calendar_dims[0].granularities.len(), 1); // Only date_info
1286
1287        // Verify date_info provides all 5 buckets
1288        let date_info = &calendar_dims[0].granularities[0];
1289        assert_eq!(date_info.column_name, "date_info");
1290        assert_eq!(date_info.buckets.len(), 5); // day, week, month, quarter, year
1291
1292        // Can query any of these buckets from the single date_info column
1293        assert_eq!(date_info.buckets[0].json_key, "date"); // day bucket
1294        assert_eq!(date_info.buckets[1].json_key, "week"); // week bucket
1295        assert_eq!(date_info.buckets[2].json_key, "month"); // month bucket
1296        assert_eq!(date_info.buckets[3].json_key, "quarter"); // quarter bucket
1297        assert_eq!(date_info.buckets[4].json_key, "year"); // year bucket
1298    }
1299
1300    // =============================================================================
1301    // Test Helpers
1302    // =============================================================================
1303
1304    /// Helper to find a path by name, returning a proper error instead of panicking
1305    fn find_path_by_name<'a>(paths: &'a [DimensionPath], name: &str) -> Option<&'a DimensionPath> {
1306        paths.iter().find(|p| p.name == name)
1307    }
1308
1309    // =============================================================================
1310    // Dimension Path Extraction Tests
1311    // =============================================================================
1312
1313    #[test]
1314    fn test_extract_dimension_paths_simple() {
1315        let sample = serde_json::json!({
1316            "category": "electronics",
1317            "region": "north",
1318            "priority": 1
1319        });
1320
1321        let paths = FactTableDetector::extract_dimension_paths(
1322            &sample,
1323            "dimensions",
1324            DatabaseType::PostgreSQL,
1325        );
1326
1327        assert_eq!(paths.len(), 3);
1328
1329        // Check category path
1330        let category = find_path_by_name(&paths, "category").expect("category path");
1331        assert_eq!(category.json_path, "dimensions->>'category'");
1332        assert_eq!(category.data_type, "string");
1333
1334        // Check region path
1335        let region = find_path_by_name(&paths, "region").expect("region path");
1336        assert_eq!(region.json_path, "dimensions->>'region'");
1337        assert_eq!(region.data_type, "string");
1338
1339        // Check priority path (integer)
1340        let priority = find_path_by_name(&paths, "priority").expect("priority path");
1341        assert_eq!(priority.json_path, "dimensions->>'priority'");
1342        assert_eq!(priority.data_type, "integer");
1343    }
1344
1345    #[test]
1346    fn test_extract_dimension_paths_nested() {
1347        let sample = serde_json::json!({
1348            "customer": {
1349                "region": "north",
1350                "tier": "gold"
1351            },
1352            "product": "laptop"
1353        });
1354
1355        let paths =
1356            FactTableDetector::extract_dimension_paths(&sample, "data", DatabaseType::PostgreSQL);
1357
1358        // Should have: customer (object), customer_region, customer_tier, product
1359        assert!(paths.iter().any(|p| p.name == "customer"));
1360        assert!(paths.iter().any(|p| p.name == "customer_region"));
1361        assert!(paths.iter().any(|p| p.name == "customer_tier"));
1362        assert!(paths.iter().any(|p| p.name == "product"));
1363
1364        // Check nested path syntax
1365        let customer_region =
1366            find_path_by_name(&paths, "customer_region").expect("customer_region path");
1367        assert_eq!(customer_region.json_path, "data->'customer'->>'region'");
1368    }
1369
1370    #[test]
1371    fn test_extract_dimension_paths_various_types() {
1372        let sample = serde_json::json!({
1373            "name": "test",
1374            "count": 42,
1375            "price": 19.99,
1376            "active": true,
1377            "tags": ["a", "b"],
1378            "metadata": {}
1379        });
1380
1381        let paths = FactTableDetector::extract_dimension_paths(
1382            &sample,
1383            "dimensions",
1384            DatabaseType::PostgreSQL,
1385        );
1386
1387        // Check type inference
1388        let name = paths.iter().find(|p| p.name == "name").unwrap();
1389        assert_eq!(name.data_type, "string");
1390
1391        let count = paths.iter().find(|p| p.name == "count").unwrap();
1392        assert_eq!(count.data_type, "integer");
1393
1394        let price = paths.iter().find(|p| p.name == "price").unwrap();
1395        assert_eq!(price.data_type, "float");
1396
1397        let active = paths.iter().find(|p| p.name == "active").unwrap();
1398        assert_eq!(active.data_type, "boolean");
1399
1400        let tags = paths.iter().find(|p| p.name == "tags").unwrap();
1401        assert_eq!(tags.data_type, "array");
1402
1403        let metadata = paths.iter().find(|p| p.name == "metadata").unwrap();
1404        assert_eq!(metadata.data_type, "object");
1405    }
1406
1407    #[test]
1408    fn test_generate_json_path_postgres() {
1409        // Top-level
1410        assert_eq!(
1411            FactTableDetector::generate_json_path(
1412                "dimensions",
1413                "category",
1414                DatabaseType::PostgreSQL
1415            ),
1416            "dimensions->>'category'"
1417        );
1418
1419        // Nested
1420        assert_eq!(
1421            FactTableDetector::generate_json_path(
1422                "data",
1423                "customer.region",
1424                DatabaseType::PostgreSQL
1425            ),
1426            "data->'customer'->>'region'"
1427        );
1428
1429        // Deeply nested
1430        assert_eq!(
1431            FactTableDetector::generate_json_path("data", "a.b.c", DatabaseType::PostgreSQL),
1432            "data->'a'->'b'->>'c'"
1433        );
1434    }
1435
1436    #[test]
1437    fn test_generate_json_path_mysql() {
1438        assert_eq!(
1439            FactTableDetector::generate_json_path("dimensions", "category", DatabaseType::MySQL),
1440            "JSON_UNQUOTE(JSON_EXTRACT(dimensions, '$.category')"
1441        );
1442
1443        assert_eq!(
1444            FactTableDetector::generate_json_path("data", "customer.region", DatabaseType::MySQL),
1445            "JSON_UNQUOTE(JSON_EXTRACT(data, '$.customer.region')"
1446        );
1447    }
1448
1449    #[test]
1450    fn test_generate_json_path_sqlite() {
1451        assert_eq!(
1452            FactTableDetector::generate_json_path("dimensions", "category", DatabaseType::SQLite),
1453            "json_extract(dimensions, '$.category')"
1454        );
1455
1456        assert_eq!(
1457            FactTableDetector::generate_json_path("data", "customer.region", DatabaseType::SQLite),
1458            "json_extract(data, '$.customer.region')"
1459        );
1460    }
1461
1462    #[test]
1463    fn test_generate_json_path_sqlserver() {
1464        assert_eq!(
1465            FactTableDetector::generate_json_path(
1466                "dimensions",
1467                "category",
1468                DatabaseType::SQLServer
1469            ),
1470            "JSON_VALUE(dimensions, '$.category')"
1471        );
1472
1473        assert_eq!(
1474            FactTableDetector::generate_json_path(
1475                "data",
1476                "customer.region",
1477                DatabaseType::SQLServer
1478            ),
1479            "JSON_VALUE(data, '$.customer.region')"
1480        );
1481    }
1482
1483    #[test]
1484    fn test_infer_json_type() {
1485        assert_eq!(FactTableDetector::infer_json_type(&serde_json::json!(null)), "string");
1486        assert_eq!(FactTableDetector::infer_json_type(&serde_json::json!(true)), "boolean");
1487        assert_eq!(FactTableDetector::infer_json_type(&serde_json::json!(42)), "integer");
1488        assert_eq!(FactTableDetector::infer_json_type(&serde_json::json!(1.5)), "float");
1489        assert_eq!(FactTableDetector::infer_json_type(&serde_json::json!("hello")), "string");
1490        assert_eq!(FactTableDetector::infer_json_type(&serde_json::json!([1, 2, 3])), "array");
1491        assert_eq!(FactTableDetector::infer_json_type(&serde_json::json!({"a": 1})), "object");
1492    }
1493
1494    #[test]
1495    fn test_extract_paths_depth_limit() {
1496        // Create deeply nested structure
1497        let sample = serde_json::json!({
1498            "level1": {
1499                "level2": {
1500                    "level3": {
1501                        "level4": {
1502                            "level5": "too deep"
1503                        }
1504                    }
1505                }
1506            }
1507        });
1508
1509        let paths =
1510            FactTableDetector::extract_dimension_paths(&sample, "data", DatabaseType::PostgreSQL);
1511
1512        // Should stop at depth 3 (level1, level2, level3, level4 but not level5)
1513        assert!(paths.iter().any(|p| p.name == "level1"));
1514        assert!(paths.iter().any(|p| p.name == "level1_level2"));
1515        assert!(paths.iter().any(|p| p.name == "level1_level2_level3"));
1516        assert!(paths.iter().any(|p| p.name == "level1_level2_level3_level4"));
1517        // level5 should NOT be extracted due to depth limit
1518        assert!(!paths.iter().any(|p| p.name.contains("level5")));
1519    }
1520
1521    #[test]
1522    fn test_extract_paths_empty_object() {
1523        let sample = serde_json::json!({});
1524        let paths = FactTableDetector::extract_dimension_paths(
1525            &sample,
1526            "dimensions",
1527            DatabaseType::PostgreSQL,
1528        );
1529        assert!(paths.is_empty());
1530    }
1531
1532    #[test]
1533    fn test_extract_paths_non_object() {
1534        // Array at root level
1535        let sample = serde_json::json!([1, 2, 3]);
1536        let paths = FactTableDetector::extract_dimension_paths(
1537            &sample,
1538            "dimensions",
1539            DatabaseType::PostgreSQL,
1540        );
1541        assert!(paths.is_empty());
1542
1543        // Scalar at root level
1544        let sample = serde_json::json!("just a string");
1545        let paths = FactTableDetector::extract_dimension_paths(
1546            &sample,
1547            "dimensions",
1548            DatabaseType::PostgreSQL,
1549        );
1550        assert!(paths.is_empty());
1551    }
1552
1553    // ==================== Explicit Fact Table Declaration Tests ====================
1554
1555    #[test]
1556    fn test_aggregation_strategy_serialization() {
1557        // Test incremental
1558        let incremental_json = serde_json::json!("incremental");
1559        let strategy: AggregationStrategy = serde_json::from_value(incremental_json).unwrap();
1560        assert_eq!(strategy, AggregationStrategy::Incremental);
1561
1562        // Test accumulating_snapshot
1563        let accum_json = serde_json::json!("accumulating_snapshot");
1564        let strategy: AggregationStrategy = serde_json::from_value(accum_json).unwrap();
1565        assert_eq!(strategy, AggregationStrategy::AccumulatingSnapshot);
1566
1567        // Test periodic_snapshot
1568        let periodic_json = serde_json::json!("periodic_snapshot");
1569        let strategy: AggregationStrategy = serde_json::from_value(periodic_json).unwrap();
1570        assert_eq!(strategy, AggregationStrategy::PeriodicSnapshot);
1571    }
1572
1573    #[test]
1574    fn test_aggregation_strategy_default() {
1575        let strategy = AggregationStrategy::default();
1576        assert_eq!(strategy, AggregationStrategy::Incremental);
1577    }
1578
1579    #[test]
1580    fn test_aggregation_strategy_equality() {
1581        assert_eq!(AggregationStrategy::Incremental, AggregationStrategy::Incremental);
1582        assert_ne!(AggregationStrategy::Incremental, AggregationStrategy::AccumulatingSnapshot);
1583    }
1584
1585    #[test]
1586    fn test_fact_table_declaration_basic() {
1587        let decl = FactTableDeclaration {
1588            name:        "tf_sales".to_string(),
1589            measures:    vec!["amount".to_string(), "quantity".to_string()],
1590            dimensions:  vec!["product_id".to_string(), "region_id".to_string()],
1591            primary_key: "id".to_string(),
1592            metadata:    None,
1593        };
1594
1595        assert_eq!(decl.name, "tf_sales");
1596        assert_eq!(decl.measures.len(), 2);
1597        assert_eq!(decl.dimensions.len(), 2);
1598        assert_eq!(decl.primary_key, "id");
1599        assert!(decl.metadata.is_none());
1600    }
1601
1602    #[test]
1603    fn test_fact_table_declaration_with_metadata() {
1604        let metadata = FactTableDeclarationMetadata {
1605            aggregation_strategy: AggregationStrategy::Incremental,
1606            grain: vec!["date".to_string(), "product".to_string()],
1607            snapshot_date_column: None,
1608            is_slowly_changing_dimension: false,
1609        };
1610
1611        let decl = FactTableDeclaration {
1612            name:        "tf_events".to_string(),
1613            measures:    vec!["count".to_string()],
1614            dimensions:  vec!["user_id".to_string(), "event_type".to_string()],
1615            primary_key: "id".to_string(),
1616            metadata:    Some(metadata.clone()),
1617        };
1618
1619        assert!(decl.metadata.is_some());
1620        let meta = decl.metadata.unwrap();
1621        assert_eq!(meta.aggregation_strategy, AggregationStrategy::Incremental);
1622        assert_eq!(meta.grain.len(), 2);
1623    }
1624
1625    #[test]
1626    fn test_fact_table_declaration_periodic_snapshot() {
1627        let metadata = FactTableDeclarationMetadata {
1628            aggregation_strategy: AggregationStrategy::PeriodicSnapshot,
1629            grain: vec!["date".to_string()],
1630            snapshot_date_column: Some("snapshot_date".to_string()),
1631            is_slowly_changing_dimension: false,
1632        };
1633
1634        let decl = FactTableDeclaration {
1635            name:        "tf_inventory".to_string(),
1636            measures:    vec!["quantity_on_hand".to_string()],
1637            dimensions:  vec!["warehouse_id".to_string()],
1638            primary_key: "id".to_string(),
1639            metadata:    Some(metadata.clone()),
1640        };
1641
1642        assert_eq!(decl.name, "tf_inventory");
1643        let meta = decl.metadata.unwrap();
1644        assert_eq!(meta.aggregation_strategy, AggregationStrategy::PeriodicSnapshot);
1645        assert_eq!(meta.snapshot_date_column, Some("snapshot_date".to_string()));
1646    }
1647
1648    #[test]
1649    fn test_fact_table_declaration_json_serialization() {
1650        let json_str = r#"{
1651            "name": "tf_sales",
1652            "measures": ["amount", "quantity"],
1653            "dimensions": ["product_id"],
1654            "primary_key": "id",
1655            "metadata": {
1656                "aggregation_strategy": "incremental",
1657                "grain": ["date", "product"],
1658                "is_slowly_changing_dimension": false
1659            }
1660        }"#;
1661
1662        let decl: FactTableDeclaration = serde_json::from_str(json_str).unwrap();
1663
1664        assert_eq!(decl.name, "tf_sales");
1665        assert_eq!(decl.measures.len(), 2);
1666        assert!(decl.metadata.is_some());
1667
1668        let meta = decl.metadata.unwrap();
1669        assert_eq!(meta.aggregation_strategy, AggregationStrategy::Incremental);
1670    }
1671
1672    #[test]
1673    fn test_fact_table_declaration_json_roundtrip() {
1674        let original = FactTableDeclaration {
1675            name:        "tf_orders".to_string(),
1676            measures:    vec!["amount".to_string()],
1677            dimensions:  vec!["customer_id".to_string()],
1678            primary_key: "id".to_string(),
1679            metadata:    Some(FactTableDeclarationMetadata {
1680                aggregation_strategy: AggregationStrategy::AccumulatingSnapshot,
1681                grain: vec!["order_id".to_string()],
1682                snapshot_date_column: None,
1683                is_slowly_changing_dimension: false,
1684            }),
1685        };
1686
1687        // Serialize
1688        let json = serde_json::to_string(&original).unwrap();
1689
1690        // Deserialize
1691        let deserialized: FactTableDeclaration = serde_json::from_str(&json).unwrap();
1692
1693        // Verify roundtrip
1694        assert_eq!(original, deserialized);
1695    }
1696
1697    #[test]
1698    fn test_fact_table_declaration_metadata_default_strategy() {
1699        let json_str = r#"{
1700            "name": "tf_events",
1701            "measures": ["count"],
1702            "dimensions": ["event_type"],
1703            "primary_key": "id",
1704            "metadata": {
1705                "grain": ["date"]
1706            }
1707        }"#;
1708
1709        let decl: FactTableDeclaration = serde_json::from_str(json_str).unwrap();
1710        let meta = decl.metadata.unwrap();
1711
1712        // Should default to Incremental
1713        assert_eq!(meta.aggregation_strategy, AggregationStrategy::default());
1714    }
1715
1716    #[test]
1717    fn test_multiple_fact_table_declarations() {
1718        let declarations = [
1719            FactTableDeclaration {
1720                name:        "tf_sales".to_string(),
1721                measures:    vec!["amount".to_string()],
1722                dimensions:  vec!["product_id".to_string()],
1723                primary_key: "id".to_string(),
1724                metadata:    None,
1725            },
1726            FactTableDeclaration {
1727                name:        "tf_events".to_string(),
1728                measures:    vec!["count".to_string()],
1729                dimensions:  vec!["user_id".to_string()],
1730                primary_key: "id".to_string(),
1731                metadata:    None,
1732            },
1733        ];
1734
1735        assert_eq!(declarations.len(), 2);
1736        assert_eq!(declarations[0].name, "tf_sales");
1737        assert_eq!(declarations[1].name, "tf_events");
1738    }
1739
1740    #[test]
1741    fn test_fact_table_declaration_large_grain() {
1742        let metadata = FactTableDeclarationMetadata {
1743            aggregation_strategy: AggregationStrategy::Incremental,
1744            grain: vec![
1745                "date".to_string(),
1746                "product".to_string(),
1747                "region".to_string(),
1748                "customer".to_string(),
1749            ],
1750            snapshot_date_column: None,
1751            is_slowly_changing_dimension: false,
1752        };
1753
1754        let decl = FactTableDeclaration {
1755            name:        "tf_sales_detailed".to_string(),
1756            measures:    vec!["amount".to_string(), "quantity".to_string()],
1757            dimensions:  vec![
1758                "date_id".to_string(),
1759                "product_id".to_string(),
1760                "region_id".to_string(),
1761                "customer_id".to_string(),
1762            ],
1763            primary_key: "id".to_string(),
1764            metadata:    Some(metadata),
1765        };
1766
1767        let meta = decl.metadata.unwrap();
1768        assert_eq!(meta.grain.len(), 4);
1769        assert_eq!(decl.dimensions.len(), 4);
1770    }
1771}