Skip to main content

fraiseql_cli/commands/
validate_facts.rs

1//! Validate that declared fact tables match database schema.
2//!
3//! This command checks:
4//! - Declared fact tables exist in database
5//! - Metadata structure matches actual database schema
6//! - Warns about undeclared tf_* tables
7//!
8//! **Purpose**: CI/CD validation step to catch schema drift.
9
10use std::{collections::HashSet, fs, path::Path};
11
12use anyhow::Result;
13use deadpool_postgres::{Config, ManagerConfig, RecyclingMethod, Runtime};
14use fraiseql_core::{
15    compiler::{
16        fact_table::{DatabaseIntrospector, FactTableDetector, FactTableMetadata},
17        ir::AuthoringIR,
18        parser::SchemaParser,
19    },
20    db::PostgresIntrospector,
21};
22use tokio_postgres::NoTls;
23
24/// Validation error type.
25#[derive(Debug)]
26pub struct ValidationIssue {
27    /// Issue type (error or warning)
28    pub severity:   IssueSeverity,
29    /// Fact table name
30    pub table_name: String,
31    /// Issue description
32    pub message:    String,
33}
34
35/// Issue severity level.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum IssueSeverity {
38    /// Critical error - validation fails
39    Error,
40    /// Warning - validation passes with warnings
41    Warning,
42}
43
44impl ValidationIssue {
45    /// Create a new error issue.
46    pub const fn error(table_name: String, message: String) -> Self {
47        Self {
48            severity: IssueSeverity::Error,
49            table_name,
50            message,
51        }
52    }
53
54    /// Create a new warning issue.
55    pub const fn warning(table_name: String, message: String) -> Self {
56        Self {
57            severity: IssueSeverity::Warning,
58            table_name,
59            message,
60        }
61    }
62}
63
64/// Create a PostgreSQL introspector from a database URL
65async fn create_introspector(database_url: &str) -> Result<PostgresIntrospector> {
66    let mut cfg = Config::new();
67    cfg.url = Some(database_url.to_string());
68    cfg.manager = Some(ManagerConfig {
69        recycling_method: RecyclingMethod::Fast,
70    });
71    cfg.pool = Some(deadpool_postgres::PoolConfig::new(2));
72
73    let pool = cfg
74        .create_pool(Some(Runtime::Tokio1), NoTls)
75        .map_err(|e| anyhow::anyhow!("Failed to create database pool: {e}"))?;
76
77    // Test connection
78    let _client = pool
79        .get()
80        .await
81        .map_err(|e| anyhow::anyhow!("Failed to connect to database: {e}"))?;
82
83    Ok(PostgresIntrospector::new(pool))
84}
85
86/// Validate that declared fact tables match database schema.
87///
88/// # Arguments
89///
90/// * `schema_path` - Path to schema.json file
91/// * `database_url` - Database connection string
92///
93/// # Returns
94///
95/// Success if all validations pass, error otherwise
96///
97/// # Example
98///
99/// ```bash
100/// fraiseql validate facts --schema schema.json --database postgresql://localhost/mydb
101/// ```
102pub async fn run(schema_path: &Path, database_url: &str) -> Result<()> {
103    eprintln!("🔍 Validating fact tables...");
104    eprintln!("   Schema: {}", schema_path.display());
105    eprintln!("   Database: {database_url}");
106    eprintln!();
107
108    // 1. Load and parse schema
109    let schema_str = fs::read_to_string(schema_path)?;
110
111    let parser = SchemaParser::new();
112    let ir: AuthoringIR = parser.parse(&schema_str)?;
113
114    let declared_tables: HashSet<String> = ir.fact_tables.keys().cloned().collect();
115
116    eprintln!("📋 Found {} declared fact table(s) in schema", declared_tables.len());
117
118    if declared_tables.is_empty() {
119        eprintln!("   No fact tables declared - nothing to validate");
120        eprintln!();
121        eprintln!("💡 Tip: Use 'fraiseql introspect facts' to discover fact tables");
122        return Ok(());
123    }
124
125    for table_name in &declared_tables {
126        eprintln!("   - {table_name}");
127    }
128    eprintln!();
129
130    // 2. Connect to database and list actual fact tables
131    let introspector = create_introspector(database_url).await?;
132
133    let actual_tables: HashSet<String> = introspector
134        .list_fact_tables()
135        .await
136        .map_err(|e| anyhow::anyhow!("Failed to list fact tables: {e}"))?
137        .into_iter()
138        .collect();
139
140    eprintln!("📊 Found {} fact table(s) in database", actual_tables.len());
141    eprintln!();
142
143    // 3. Validate each declared table
144    let mut issues: Vec<ValidationIssue> = Vec::new();
145    let mut validated_count = 0;
146
147    for table_name in &declared_tables {
148        eprintln!("   Validating {table_name}...");
149
150        // Check if table exists in database
151        if !actual_tables.contains(table_name) {
152            issues.push(ValidationIssue::error(
153                table_name.clone(),
154                "Table does not exist in database".to_string(),
155            ));
156            continue;
157        }
158
159        // Introspect actual table structure
160        match FactTableDetector::introspect(&introspector, table_name).await {
161            Ok(actual_metadata) => {
162                // Get declared metadata
163                if let Some(declared_json) = ir.fact_tables.get(table_name) {
164                    // Compare structures
165                    let comparison_issues =
166                        compare_metadata(table_name, declared_json, &actual_metadata);
167                    issues.extend(comparison_issues);
168                }
169                validated_count += 1;
170            },
171            Err(e) => {
172                issues.push(ValidationIssue::error(
173                    table_name.clone(),
174                    format!("Failed to introspect: {e}"),
175                ));
176            },
177        }
178    }
179
180    // 4. Check for undeclared tables in database
181    for table_name in &actual_tables {
182        if !declared_tables.contains(table_name) {
183            issues.push(ValidationIssue::warning(
184                table_name.clone(),
185                "Table exists in database but not declared in schema".to_string(),
186            ));
187        }
188    }
189
190    // 5. Report results
191    eprintln!();
192    let errors: Vec<&ValidationIssue> =
193        issues.iter().filter(|i| i.severity == IssueSeverity::Error).collect();
194    let warnings: Vec<&ValidationIssue> =
195        issues.iter().filter(|i| i.severity == IssueSeverity::Warning).collect();
196
197    if !errors.is_empty() {
198        eprintln!("❌ Errors ({}):", errors.len());
199        for issue in &errors {
200            eprintln!("   {} - {}", issue.table_name, issue.message);
201        }
202        eprintln!();
203    }
204
205    if !warnings.is_empty() {
206        eprintln!("⚠️  Warnings ({}):", warnings.len());
207        for issue in &warnings {
208            eprintln!("   {} - {}", issue.table_name, issue.message);
209        }
210        eprintln!();
211    }
212
213    if errors.is_empty() {
214        eprintln!("✅ Validation passed");
215        eprintln!("   {validated_count} table(s) validated successfully");
216        if !warnings.is_empty() {
217            eprintln!("   {} warning(s)", warnings.len());
218        }
219        Ok(())
220    } else {
221        Err(anyhow::anyhow!("Validation failed with {} error(s)", errors.len()))
222    }
223}
224
225/// Compare declared metadata with actual database metadata
226fn compare_metadata(
227    table_name: &str,
228    declared: &serde_json::Value,
229    actual: &FactTableMetadata,
230) -> Vec<ValidationIssue> {
231    let mut issues = Vec::new();
232
233    // Extract declared measures
234    if let Some(declared_measures) = declared.get("measures").and_then(|m| m.as_array()) {
235        let declared_measure_names: HashSet<String> = declared_measures
236            .iter()
237            .filter_map(|m| m.get("name").and_then(|n| n.as_str()))
238            .map(String::from)
239            .collect();
240
241        let actual_measure_names: HashSet<String> =
242            actual.measures.iter().map(|m| m.name.clone()).collect();
243
244        // Check for missing measures in actual
245        for name in &declared_measure_names {
246            if !actual_measure_names.contains(name) {
247                issues.push(ValidationIssue::error(
248                    table_name.to_string(),
249                    format!("Declared measure '{name}' not found in database"),
250                ));
251            }
252        }
253
254        // Check for extra measures in actual (warning)
255        for name in &actual_measure_names {
256            if !declared_measure_names.contains(name) {
257                issues.push(ValidationIssue::warning(
258                    table_name.to_string(),
259                    format!("Database has measure '{name}' not declared in schema"),
260                ));
261            }
262        }
263
264        // Validate measure types
265        for declared_measure in declared_measures {
266            if let (Some(name), Some(declared_type)) = (
267                declared_measure.get("name").and_then(|n| n.as_str()),
268                declared_measure.get("sql_type").and_then(|t| t.as_str()),
269            ) {
270                if let Some(actual_measure) = actual.measures.iter().find(|m| m.name == name) {
271                    let actual_type_str = format!("{:?}", actual_measure.sql_type);
272                    if !types_compatible(declared_type, &actual_type_str) {
273                        issues.push(ValidationIssue::warning(
274                            table_name.to_string(),
275                            format!(
276                                "Measure '{name}' type mismatch: declared '{declared_type}', actual '{actual_type_str}'"
277                            ),
278                        ));
279                    }
280                }
281            }
282        }
283    }
284
285    // Validate dimensions column
286    if let Some(declared_dims) = declared.get("dimensions") {
287        if let Some(declared_name) = declared_dims.get("name").and_then(|n| n.as_str()) {
288            if declared_name != actual.dimensions.name {
289                issues.push(ValidationIssue::error(
290                    table_name.to_string(),
291                    format!(
292                        "Dimensions column mismatch: declared '{}', actual '{}'",
293                        declared_name, actual.dimensions.name
294                    ),
295                ));
296            }
297        }
298    }
299
300    // Validate denormalized filters
301    if let Some(declared_filters) = declared.get("denormalized_filters").and_then(|f| f.as_array())
302    {
303        let declared_filter_names: HashSet<String> = declared_filters
304            .iter()
305            .filter_map(|f| f.get("name").and_then(|n| n.as_str()))
306            .map(String::from)
307            .collect();
308
309        let actual_filter_names: HashSet<String> =
310            actual.denormalized_filters.iter().map(|f| f.name.clone()).collect();
311
312        for name in &declared_filter_names {
313            if !actual_filter_names.contains(name) {
314                issues.push(ValidationIssue::warning(
315                    table_name.to_string(),
316                    format!("Declared filter '{name}' not found in database"),
317                ));
318            }
319        }
320    }
321
322    issues
323}
324
325/// Check if two SQL types are compatible
326fn types_compatible(declared: &str, actual: &str) -> bool {
327    let declared_lower = declared.to_lowercase();
328    let actual_lower = actual.to_lowercase();
329
330    // Exact match
331    if declared_lower == actual_lower {
332        return true;
333    }
334
335    // Common aliases
336    let aliases: &[(&[&str], &[&str])] = &[
337        (&["int", "integer", "int4"], &["int", "integer", "int4"]),
338        (&["bigint", "int8"], &["bigint", "int8"]),
339        (&["decimal", "numeric", "money"], &["decimal", "numeric", "money"]),
340        (&["float", "double", "real", "float8"], &["float", "double", "real", "float8"]),
341        (&["text", "varchar", "string"], &["text", "varchar", "string"]),
342        (&["uuid"], &["uuid"]),
343        (
344            &["timestamp", "timestamptz", "datetime"],
345            &["timestamp", "timestamptz", "datetime"],
346        ),
347        (&["json", "jsonb"], &["json", "jsonb"]),
348        (&["bool", "boolean"], &["bool", "boolean"]),
349    ];
350
351    for (group1, group2) in aliases {
352        let in_group1 = group1.iter().any(|t| declared_lower.contains(t));
353        let in_group2 = group2.iter().any(|t| actual_lower.contains(t));
354        if in_group1 && in_group2 {
355            return true;
356        }
357    }
358
359    false
360}
361
362#[cfg(test)]
363mod tests {
364    use fraiseql_core::compiler::fact_table::{
365        DimensionColumn, FilterColumn, MeasureColumn, SqlType,
366    };
367
368    use super::*;
369
370    #[test]
371    fn test_validation_issue_error() {
372        let issue = ValidationIssue::error("tf_sales".to_string(), "Table not found".to_string());
373        assert_eq!(issue.severity, IssueSeverity::Error);
374        assert_eq!(issue.table_name, "tf_sales");
375    }
376
377    #[test]
378    fn test_validation_issue_warning() {
379        let issue = ValidationIssue::warning(
380            "tf_orders".to_string(),
381            "Table exists but not declared".to_string(),
382        );
383        assert_eq!(issue.severity, IssueSeverity::Warning);
384    }
385
386    #[test]
387    fn test_types_compatible() {
388        // Exact match
389        assert!(types_compatible("Int", "Int"));
390        assert!(types_compatible("Decimal", "Decimal"));
391
392        // Aliases
393        assert!(types_compatible("integer", "Int"));
394        assert!(types_compatible("int4", "Int"));
395        assert!(types_compatible("bigint", "BigInt"));
396        assert!(types_compatible("numeric", "Decimal"));
397        assert!(types_compatible("float", "Float"));
398        assert!(types_compatible("double", "Float"));
399        assert!(types_compatible("text", "Text"));
400        assert!(types_compatible("varchar", "Text"));
401
402        // Incompatible
403        assert!(!types_compatible("Int", "Text"));
404        assert!(!types_compatible("Decimal", "Boolean"));
405    }
406
407    #[test]
408    fn test_compare_metadata_matching() {
409        let declared = serde_json::json!({
410            "measures": [
411                {"name": "revenue", "sql_type": "Decimal"},
412                {"name": "quantity", "sql_type": "Int"}
413            ],
414            "dimensions": {"name": "data"},
415            "denormalized_filters": [
416                {"name": "customer_id"}
417            ]
418        });
419
420        let actual = FactTableMetadata {
421            table_name:           "tf_sales".to_string(),
422            measures:             vec![
423                MeasureColumn {
424                    name:     "revenue".to_string(),
425                    sql_type: SqlType::Decimal,
426                    nullable: false,
427                },
428                MeasureColumn {
429                    name:     "quantity".to_string(),
430                    sql_type: SqlType::Int,
431                    nullable: false,
432                },
433            ],
434            dimensions:           DimensionColumn {
435                name:  "data".to_string(),
436                paths: vec![],
437            },
438            denormalized_filters: vec![FilterColumn {
439                name:     "customer_id".to_string(),
440                sql_type: SqlType::Uuid,
441                indexed:  true,
442            }],
443            calendar_dimensions:  vec![],
444        };
445
446        let issues = compare_metadata("tf_sales", &declared, &actual);
447
448        // No errors expected for matching metadata
449        let errors: Vec<_> = issues.iter().filter(|i| i.severity == IssueSeverity::Error).collect();
450        assert!(errors.is_empty(), "Unexpected errors: {errors:?}");
451    }
452
453    #[test]
454    fn test_compare_metadata_missing_measure() {
455        let declared = serde_json::json!({
456            "measures": [
457                {"name": "revenue", "sql_type": "Decimal"},
458                {"name": "profit", "sql_type": "Decimal"}  // Not in actual
459            ],
460            "dimensions": {"name": "data"}
461        });
462
463        let actual = FactTableMetadata {
464            table_name:           "tf_sales".to_string(),
465            measures:             vec![MeasureColumn {
466                name:     "revenue".to_string(),
467                sql_type: SqlType::Decimal,
468                nullable: false,
469            }],
470            dimensions:           DimensionColumn {
471                name:  "data".to_string(),
472                paths: vec![],
473            },
474            denormalized_filters: vec![],
475            calendar_dimensions:  vec![],
476        };
477
478        let issues = compare_metadata("tf_sales", &declared, &actual);
479
480        // Should have error for missing 'profit' measure
481        let errors: Vec<_> = issues.iter().filter(|i| i.severity == IssueSeverity::Error).collect();
482        assert_eq!(errors.len(), 1);
483        assert!(errors[0].message.contains("profit"));
484    }
485}