Skip to main content

fraiseql_cli/commands/
validate_facts.rs

1//! Validate that declared fact tables match database schema.
2//!
3//! This command checks:
4//! - Declared fact tables exist in database
5//! - Metadata structure matches actual database schema
6//! - Warns about undeclared tf_* tables
7//!
8//! **Purpose**: CI/CD validation step to catch schema drift.
9
10use std::{collections::HashSet, fs, path::Path};
11
12use anyhow::Result;
13use deadpool_postgres::{Config, ManagerConfig, RecyclingMethod, Runtime};
14use fraiseql_core::{
15    compiler::{
16        fact_table::{DatabaseIntrospector, FactTableDetector, FactTableMetadata},
17        ir::AuthoringIR,
18        parser::SchemaParser,
19    },
20    db::PostgresIntrospector,
21};
22use tokio_postgres::NoTls;
23
24/// Validation error type.
25#[derive(Debug)]
26pub struct ValidationIssue {
27    /// Issue type (error or warning)
28    pub severity:   IssueSeverity,
29    /// Fact table name
30    pub table_name: String,
31    /// Issue description
32    pub message:    String,
33}
34
35/// Issue severity level.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum IssueSeverity {
38    /// Critical error - validation fails
39    Error,
40    /// Warning - validation passes with warnings
41    Warning,
42}
43
44impl ValidationIssue {
45    /// Create a new error issue.
46    pub const fn error(table_name: String, message: String) -> Self {
47        Self {
48            severity: IssueSeverity::Error,
49            table_name,
50            message,
51        }
52    }
53
54    /// Create a new warning issue.
55    pub const fn warning(table_name: String, message: String) -> Self {
56        Self {
57            severity: IssueSeverity::Warning,
58            table_name,
59            message,
60        }
61    }
62}
63
64/// Create a PostgreSQL introspector from a database URL
65async fn create_introspector(database_url: &str) -> Result<PostgresIntrospector> {
66    let mut cfg = Config::new();
67    cfg.url = Some(database_url.to_string());
68    cfg.manager = Some(ManagerConfig {
69        recycling_method: RecyclingMethod::Fast,
70    });
71    cfg.pool = Some(deadpool_postgres::PoolConfig::new(2));
72
73    let pool = cfg
74        .create_pool(Some(Runtime::Tokio1), NoTls)
75        .map_err(|e| anyhow::anyhow!("Failed to create database pool: {e}"))?;
76
77    // Test connection
78    let _client = pool
79        .get()
80        .await
81        .map_err(|e| anyhow::anyhow!("Failed to connect to database: {e}"))?;
82
83    Ok(PostgresIntrospector::new(pool))
84}
85
86/// Validate that declared fact tables match database schema.
87///
88/// # Arguments
89///
90/// * `schema_path` - Path to schema.json file
91/// * `database_url` - Database connection string
92///
93/// # Returns
94///
95/// Success if all validations pass, error otherwise
96///
97/// # Example
98///
99/// ```bash
100/// fraiseql validate facts --schema schema.json --database postgresql://localhost/mydb
101/// ```
102pub async fn run(schema_path: &Path, database_url: &str) -> Result<()> {
103    eprintln!("🔍 Validating fact tables...");
104    eprintln!("   Schema: {}", schema_path.display());
105    eprintln!("   Database: {database_url}");
106    eprintln!();
107
108    // 1. Load and parse schema
109    let schema_str = fs::read_to_string(schema_path)?;
110
111    let parser = SchemaParser::new();
112    let ir: AuthoringIR = parser.parse(&schema_str)?;
113
114    let declared_tables: HashSet<String> = ir.fact_tables.keys().cloned().collect();
115
116    eprintln!("📋 Found {} declared fact table(s) in schema", declared_tables.len());
117
118    if declared_tables.is_empty() {
119        eprintln!("   No fact tables declared - nothing to validate");
120        eprintln!();
121        eprintln!("💡 Tip: Use 'fraiseql introspect facts' to discover fact tables");
122        return Ok(());
123    }
124
125    for table_name in &declared_tables {
126        eprintln!("   - {table_name}");
127    }
128    eprintln!();
129
130    // 2. Connect to database and list actual fact tables
131    let introspector = create_introspector(database_url).await?;
132
133    let actual_tables: HashSet<String> = introspector
134        .list_fact_tables()
135        .await
136        .map_err(|e| anyhow::anyhow!("Failed to list fact tables: {e}"))?
137        .into_iter()
138        .collect();
139
140    eprintln!("📊 Found {} fact table(s) in database", actual_tables.len());
141    eprintln!();
142
143    // 3. Validate each declared table
144    let mut issues: Vec<ValidationIssue> = Vec::new();
145    let mut validated_count = 0;
146
147    for table_name in &declared_tables {
148        eprintln!("   Validating {table_name}...");
149
150        // Check if table exists in database
151        if !actual_tables.contains(table_name) {
152            issues.push(ValidationIssue::error(
153                table_name.clone(),
154                "Table does not exist in database".to_string(),
155            ));
156            continue;
157        }
158
159        // Introspect actual table structure
160        match FactTableDetector::introspect(&introspector, table_name).await {
161            Ok(actual_metadata) => {
162                // Get declared metadata
163                if let Some(declared_json) = ir.fact_tables.get(table_name) {
164                    // Compare structures
165                    let comparison_issues =
166                        compare_metadata(table_name, declared_json, &actual_metadata);
167                    issues.extend(comparison_issues);
168                }
169                validated_count += 1;
170            },
171            Err(e) => {
172                issues.push(ValidationIssue::error(
173                    table_name.clone(),
174                    format!("Failed to introspect: {e}"),
175                ));
176            },
177        }
178    }
179
180    // 4. Check for undeclared tables in database
181    for table_name in &actual_tables {
182        if !declared_tables.contains(table_name) {
183            issues.push(ValidationIssue::warning(
184                table_name.clone(),
185                "Table exists in database but not declared in schema".to_string(),
186            ));
187        }
188    }
189
190    // 5. Report results
191    eprintln!();
192    let errors: Vec<&ValidationIssue> =
193        issues.iter().filter(|i| i.severity == IssueSeverity::Error).collect();
194    let warnings: Vec<&ValidationIssue> =
195        issues.iter().filter(|i| i.severity == IssueSeverity::Warning).collect();
196
197    if !errors.is_empty() {
198        eprintln!("❌ Errors ({}):", errors.len());
199        for issue in &errors {
200            eprintln!("   {} - {}", issue.table_name, issue.message);
201        }
202        eprintln!();
203    }
204
205    if !warnings.is_empty() {
206        eprintln!("⚠️  Warnings ({}):", warnings.len());
207        for issue in &warnings {
208            eprintln!("   {} - {}", issue.table_name, issue.message);
209        }
210        eprintln!();
211    }
212
213    if errors.is_empty() {
214        eprintln!("✅ Validation passed");
215        eprintln!("   {validated_count} table(s) validated successfully");
216        if !warnings.is_empty() {
217            eprintln!("   {} warning(s)", warnings.len());
218        }
219        Ok(())
220    } else {
221        Err(anyhow::anyhow!("Validation failed with {} error(s)", errors.len()))
222    }
223}
224
225/// Compare declared metadata with actual database metadata
226fn compare_metadata(
227    table_name: &str,
228    declared: &serde_json::Value,
229    actual: &FactTableMetadata,
230) -> Vec<ValidationIssue> {
231    let mut issues = Vec::new();
232
233    // Extract declared measures
234    if let Some(declared_measures) = declared.get("measures").and_then(|m| m.as_array()) {
235        let declared_measure_names: HashSet<String> = declared_measures
236            .iter()
237            .filter_map(|m| m.get("name").and_then(|n| n.as_str()))
238            .map(String::from)
239            .collect();
240
241        let actual_measure_names: HashSet<String> =
242            actual.measures.iter().map(|m| m.name.clone()).collect();
243
244        // Check for missing measures in actual
245        for name in &declared_measure_names {
246            if !actual_measure_names.contains(name) {
247                issues.push(ValidationIssue::error(
248                    table_name.to_string(),
249                    format!("Declared measure '{name}' not found in database"),
250                ));
251            }
252        }
253
254        // Check for extra measures in actual (warning)
255        for name in &actual_measure_names {
256            if !declared_measure_names.contains(name) {
257                issues.push(ValidationIssue::warning(
258                    table_name.to_string(),
259                    format!("Database has measure '{name}' not declared in schema"),
260                ));
261            }
262        }
263
264        // Validate measure types
265        for declared_measure in declared_measures {
266            if let (Some(name), Some(declared_type)) = (
267                declared_measure.get("name").and_then(|n| n.as_str()),
268                declared_measure.get("sql_type").and_then(|t| t.as_str()),
269            ) {
270                if let Some(actual_measure) = actual.measures.iter().find(|m| m.name == name) {
271                    let actual_type_str = format!("{:?}", actual_measure.sql_type);
272                    if !types_compatible(declared_type, &actual_type_str) {
273                        issues.push(ValidationIssue::warning(
274                            table_name.to_string(),
275                            format!(
276                                "Measure '{name}' type mismatch: declared '{declared_type}', actual '{actual_type_str}'"
277                            ),
278                        ));
279                    }
280                }
281            }
282        }
283    }
284
285    // Validate dimensions column
286    if let Some(declared_dims) = declared.get("dimensions") {
287        if let Some(declared_name) = declared_dims.get("name").and_then(|n| n.as_str()) {
288            if declared_name != actual.dimensions.name {
289                issues.push(ValidationIssue::error(
290                    table_name.to_string(),
291                    format!(
292                        "Dimensions column mismatch: declared '{}', actual '{}'",
293                        declared_name, actual.dimensions.name
294                    ),
295                ));
296            }
297        }
298    }
299
300    // Validate denormalized filters
301    if let Some(declared_filters) = declared.get("denormalized_filters").and_then(|f| f.as_array())
302    {
303        let declared_filter_names: HashSet<String> = declared_filters
304            .iter()
305            .filter_map(|f| f.get("name").and_then(|n| n.as_str()))
306            .map(String::from)
307            .collect();
308
309        let actual_filter_names: HashSet<String> =
310            actual.denormalized_filters.iter().map(|f| f.name.clone()).collect();
311
312        for name in &declared_filter_names {
313            if !actual_filter_names.contains(name) {
314                issues.push(ValidationIssue::warning(
315                    table_name.to_string(),
316                    format!("Declared filter '{name}' not found in database"),
317                ));
318            }
319        }
320    }
321
322    issues
323}
324
325/// Check if two SQL types are compatible
326fn types_compatible(declared: &str, actual: &str) -> bool {
327    let declared_lower = declared.to_lowercase();
328    let actual_lower = actual.to_lowercase();
329
330    // Exact match
331    if declared_lower == actual_lower {
332        return true;
333    }
334
335    // Common aliases
336    let aliases: &[(&[&str], &[&str])] = &[
337        (&["int", "integer", "int4"], &["int", "integer", "int4"]),
338        (&["bigint", "int8"], &["bigint", "int8"]),
339        (&["decimal", "numeric", "money"], &["decimal", "numeric", "money"]),
340        (&["float", "double", "real", "float8"], &["float", "double", "real", "float8"]),
341        (&["text", "varchar", "string"], &["text", "varchar", "string"]),
342        (&["uuid"], &["uuid"]),
343        (
344            &["timestamp", "timestamptz", "datetime"],
345            &["timestamp", "timestamptz", "datetime"],
346        ),
347        (&["json", "jsonb"], &["json", "jsonb"]),
348        (&["bool", "boolean"], &["bool", "boolean"]),
349    ];
350
351    for (group1, group2) in aliases {
352        let in_group1 = group1.iter().any(|t| declared_lower.contains(t));
353        let in_group2 = group2.iter().any(|t| actual_lower.contains(t));
354        if in_group1 && in_group2 {
355            return true;
356        }
357    }
358
359    false
360}
361
362/// Validate metadata structure (basic validation)
363#[allow(dead_code)]
364pub fn validate_metadata_match(
365    declared: &serde_json::Value,
366    _actual_metadata: &serde_json::Value,
367) -> std::result::Result<(), String> {
368    let obj = declared.as_object().ok_or_else(|| "Metadata must be an object".to_string())?;
369
370    // Check required fields exist
371    if !obj.contains_key("measures") {
372        return Err("Missing 'measures' field".to_string());
373    }
374
375    if !obj.contains_key("dimensions") {
376        return Err("Missing 'dimensions' field".to_string());
377    }
378
379    Ok(())
380}
381
382#[cfg(test)]
383mod tests {
384    use fraiseql_core::compiler::fact_table::{
385        DimensionColumn, FilterColumn, MeasureColumn, SqlType,
386    };
387
388    use super::*;
389
390    #[test]
391    fn test_validation_issue_error() {
392        let issue = ValidationIssue::error("tf_sales".to_string(), "Table not found".to_string());
393        assert_eq!(issue.severity, IssueSeverity::Error);
394        assert_eq!(issue.table_name, "tf_sales");
395    }
396
397    #[test]
398    fn test_validation_issue_warning() {
399        let issue = ValidationIssue::warning(
400            "tf_orders".to_string(),
401            "Table exists but not declared".to_string(),
402        );
403        assert_eq!(issue.severity, IssueSeverity::Warning);
404    }
405
406    #[test]
407    fn test_validate_metadata_match() {
408        let metadata = serde_json::json!({
409            "measures": [],
410            "dimensions": {"name": "data"}
411        });
412
413        let result = validate_metadata_match(&metadata, &metadata);
414        assert!(result.is_ok());
415    }
416
417    #[test]
418    fn test_validate_metadata_match_missing_measures() {
419        let metadata = serde_json::json!({
420            "dimensions": {"name": "data"}
421        });
422
423        let result = validate_metadata_match(&metadata, &metadata);
424        assert!(result.is_err());
425    }
426
427    #[test]
428    fn test_types_compatible() {
429        // Exact match
430        assert!(types_compatible("Int", "Int"));
431        assert!(types_compatible("Decimal", "Decimal"));
432
433        // Aliases
434        assert!(types_compatible("integer", "Int"));
435        assert!(types_compatible("int4", "Int"));
436        assert!(types_compatible("bigint", "BigInt"));
437        assert!(types_compatible("numeric", "Decimal"));
438        assert!(types_compatible("float", "Float"));
439        assert!(types_compatible("double", "Float"));
440        assert!(types_compatible("text", "Text"));
441        assert!(types_compatible("varchar", "Text"));
442
443        // Incompatible
444        assert!(!types_compatible("Int", "Text"));
445        assert!(!types_compatible("Decimal", "Boolean"));
446    }
447
448    #[test]
449    fn test_compare_metadata_matching() {
450        let declared = serde_json::json!({
451            "measures": [
452                {"name": "revenue", "sql_type": "Decimal"},
453                {"name": "quantity", "sql_type": "Int"}
454            ],
455            "dimensions": {"name": "data"},
456            "denormalized_filters": [
457                {"name": "customer_id"}
458            ]
459        });
460
461        let actual = FactTableMetadata {
462            table_name:           "tf_sales".to_string(),
463            measures:             vec![
464                MeasureColumn {
465                    name:     "revenue".to_string(),
466                    sql_type: SqlType::Decimal,
467                    nullable: false,
468                },
469                MeasureColumn {
470                    name:     "quantity".to_string(),
471                    sql_type: SqlType::Int,
472                    nullable: false,
473                },
474            ],
475            dimensions:           DimensionColumn {
476                name:  "data".to_string(),
477                paths: vec![],
478            },
479            denormalized_filters: vec![FilterColumn {
480                name:     "customer_id".to_string(),
481                sql_type: SqlType::Uuid,
482                indexed:  true,
483            }],
484            calendar_dimensions:  vec![],
485        };
486
487        let issues = compare_metadata("tf_sales", &declared, &actual);
488
489        // No errors expected for matching metadata
490        let errors: Vec<_> = issues.iter().filter(|i| i.severity == IssueSeverity::Error).collect();
491        assert!(errors.is_empty(), "Unexpected errors: {errors:?}");
492    }
493
494    #[test]
495    fn test_compare_metadata_missing_measure() {
496        let declared = serde_json::json!({
497            "measures": [
498                {"name": "revenue", "sql_type": "Decimal"},
499                {"name": "profit", "sql_type": "Decimal"}  // Not in actual
500            ],
501            "dimensions": {"name": "data"}
502        });
503
504        let actual = FactTableMetadata {
505            table_name:           "tf_sales".to_string(),
506            measures:             vec![MeasureColumn {
507                name:     "revenue".to_string(),
508                sql_type: SqlType::Decimal,
509                nullable: false,
510            }],
511            dimensions:           DimensionColumn {
512                name:  "data".to_string(),
513                paths: vec![],
514            },
515            denormalized_filters: vec![],
516            calendar_dimensions:  vec![],
517        };
518
519        let issues = compare_metadata("tf_sales", &declared, &actual);
520
521        // Should have error for missing 'profit' measure
522        let errors: Vec<_> = issues.iter().filter(|i| i.severity == IssueSeverity::Error).collect();
523        assert_eq!(errors.len(), 1);
524        assert!(errors[0].message.contains("profit"));
525    }
526}