Skip to main content

fraiseql_cli/schema/
database_validator.rs

1//! Compile-time database validation for schema definitions.
2//!
3//! Validates a compiled schema against a live database at three levels:
4//! - **L1**: `sql_source` relation exists in the database
5//! - **L2**: Columns and JSON column types match
6//! - **L3**: JSONB keys exist in sampled rows (best-effort)
7//!
8//! All diagnostics are warnings — compilation never fails due to validation.
9
10use std::{
11    collections::{HashMap, HashSet},
12    fmt,
13};
14
15use fraiseql_core::{
16    db::{
17        DatabaseType,
18        introspector::{DatabaseIntrospector, RelationInfo},
19    },
20    schema::CompiledSchema,
21};
22
23/// Report containing all database validation warnings and discovered metadata.
24pub struct DatabaseValidationReport {
25    /// All warnings emitted during validation.
26    pub warnings:       Vec<DatabaseWarning>,
27    /// Native columns discovered per query during L2 validation.
28    ///
29    /// Key: query name. Value: map of argument name → PostgreSQL type string
30    /// (e.g. `"uuid"`, `"integer"`, `"text"`).
31    ///
32    /// Only contains entries for queries that have at least one direct argument
33    /// with a matching native column on their `sql_source`.
34    pub native_columns: HashMap<String, HashMap<String, String>>,
35}
36
37/// A single database validation warning.
38#[derive(Debug)]
39pub enum DatabaseWarning {
40    /// L1: `sql_source` relation does not exist.
41    MissingRelation {
42        /// Name of the query or mutation.
43        query_name: String,
44        /// The `sql_source` value that was not found.
45        sql_source: String,
46    },
47    /// L1: `additional_view` does not exist.
48    MissingAdditionalView {
49        /// Name of the query.
50        query_name: String,
51        /// The view name that was not found.
52        view_name:  String,
53    },
54    /// L2: `jsonb_column` does not exist on the relation.
55    MissingJsonColumn {
56        /// Name of the query.
57        query_name:  String,
58        /// The `sql_source` relation.
59        sql_source:  String,
60        /// The missing column name.
61        column_name: String,
62    },
63    /// L2: `jsonb_column` exists but is not a JSON/JSONB type.
64    WrongJsonColumnType {
65        /// Name of the query.
66        query_name:  String,
67        /// The `sql_source` relation.
68        sql_source:  String,
69        /// The column name.
70        column_name: String,
71        /// The actual SQL data type.
72        actual_type: String,
73    },
74    /// L2: `relay_cursor_column` does not exist on the relation.
75    MissingCursorColumn {
76        /// Name of the query.
77        query_name:  String,
78        /// The `sql_source` relation.
79        sql_source:  String,
80        /// The missing cursor column name.
81        column_name: String,
82    },
83    /// L3: a JSON key path is declared but not found in sampled data.
84    MissingJsonKey {
85        /// Name of the query.
86        query_name:  String,
87        /// The `sql_source` relation.
88        sql_source:  String,
89        /// The JSON column being sampled.
90        json_column: String,
91        /// The GraphQL field name.
92        field_name:  String,
93        /// The snake_case key looked up in the JSON.
94        json_key:    String,
95    },
96    /// L2: a direct query argument has no matching native column — will fall back to JSONB
97    /// extraction.
98    ///
99    /// For best performance, consider adding a native column with the same name
100    /// and an index on the `sql_source` table/view.
101    NativeColumnFallback {
102        /// Name of the query.
103        query_name: String,
104        /// The `sql_source` relation.
105        sql_source: String,
106        /// The argument name that has no matching native column.
107        arg_name:   String,
108    },
109}
110
111impl fmt::Display for DatabaseWarning {
112    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113        match self {
114            Self::MissingRelation {
115                query_name,
116                sql_source,
117            } => {
118                write!(
119                    f,
120                    "query `{query_name}`: sql_source `{sql_source}` does not exist in database"
121                )
122            },
123            Self::MissingAdditionalView {
124                query_name,
125                view_name,
126            } => {
127                write!(
128                    f,
129                    "query `{query_name}`: additional_view `{view_name}` does not exist in database"
130                )
131            },
132            Self::MissingJsonColumn {
133                query_name,
134                sql_source,
135                column_name,
136            } => {
137                write!(
138                    f,
139                    "query `{query_name}`: column `{column_name}` not found on `{sql_source}`"
140                )
141            },
142            Self::WrongJsonColumnType {
143                query_name,
144                sql_source,
145                column_name,
146                actual_type,
147            } => {
148                write!(
149                    f,
150                    "query `{query_name}`: column `{column_name}` on `{sql_source}` is `{actual_type}`, expected json/jsonb"
151                )
152            },
153            Self::MissingCursorColumn {
154                query_name,
155                sql_source,
156                column_name,
157            } => {
158                write!(
159                    f,
160                    "query `{query_name}`: relay cursor column `{column_name}` not found on `{sql_source}`"
161                )
162            },
163            Self::MissingJsonKey {
164                query_name,
165                sql_source,
166                json_column,
167                field_name,
168                json_key,
169            } => {
170                write!(
171                    f,
172                    "query `{query_name}`: field `{field_name}` (key `{json_key}`) not found in `{sql_source}.{json_column}` sample data"
173                )
174            },
175            Self::NativeColumnFallback {
176                query_name,
177                sql_source,
178                arg_name,
179            } => {
180                write!(
181                    f,
182                    "query `{query_name}`: argument `{arg_name}` will use JSONB extraction \
183                     (`{sql_source}.data->>''{arg_name}''`) — no native column `{arg_name}` found on \
184                     `{sql_source}`. Add a native column with an index for O(log n) lookup."
185                )
186            },
187        }
188    }
189}
190
191/// Check if a SQL data type represents a JSON column for the given database.
192pub(crate) fn is_json_type(data_type: &str, db_type: DatabaseType) -> bool {
193    let lower = data_type.to_lowercase();
194    match db_type {
195        DatabaseType::PostgreSQL => lower == "jsonb" || lower == "json",
196        DatabaseType::MySQL => lower == "json",
197        DatabaseType::SQLite => lower.contains("json"),
198        // SQL Server has no native JSON type — always attempt JSON
199        // validation for the configured jsonb_column
200        DatabaseType::SQLServer => true,
201    }
202}
203
204/// Split a potentially schema-qualified name into (optional_schema, name).
205fn split_schema_qualified(sql_source: &str) -> (Option<&str>, &str) {
206    match sql_source.split_once('.') {
207        Some((schema, table)) => (Some(schema), table),
208        None => (None, sql_source),
209    }
210}
211
212/// Check if a relation exists in the relation lookup maps.
213fn relation_exists(
214    schema_qualified: &HashMap<(String, String), RelationInfo>,
215    unqualified: &HashMap<String, Vec<String>>,
216    sql_source: &str,
217) -> bool {
218    let (schema, name) = split_schema_qualified(sql_source);
219    if let Some(s) = schema {
220        schema_qualified.contains_key(&(s.to_string(), name.to_string()))
221    } else {
222        unqualified.contains_key(name)
223    }
224}
225
226/// Convert a `camelCase` or `PascalCase` field name to `snake_case`.
227///
228/// This matches the convention used by FraiseQL for JSONB key extraction.
229pub(crate) fn to_snake_case(name: &str) -> String {
230    let mut result = String::with_capacity(name.len() + 4);
231    for (i, ch) in name.chars().enumerate() {
232        if ch.is_uppercase() {
233            if i > 0 {
234                result.push('_');
235            }
236            result.push(ch.to_lowercase().next().unwrap_or(ch));
237        } else {
238            result.push(ch);
239        }
240    }
241    result
242}
243
244/// Validate a compiled schema against a live database.
245///
246/// Performs three levels of validation:
247/// - **L1**: Checks that `sql_source` relations exist
248/// - **L2**: Checks column existence and JSON column types
249/// - **L3**: Checks JSONB key existence via sampling
250///
251/// All diagnostics are warnings — the report never causes compilation to fail.
252///
253/// # Errors
254///
255/// Returns `FraiseQLError` if database introspection queries fail.
256pub async fn validate_schema_against_database(
257    schema: &CompiledSchema,
258    introspector: &impl DatabaseIntrospector,
259) -> fraiseql_core::Result<DatabaseValidationReport> {
260    // Auto-wired argument names excluded from direct-arg native column detection.
261    // Must stay in sync with AUTO_PARAM_NAMES in fraiseql-core/runtime/executor/query.rs.
262    const AUTO_PARAM_NAMES: &[&str] = &[
263        "where", "limit", "offset", "orderBy", "first", "last", "after", "before",
264    ];
265
266    let mut warnings = Vec::new();
267    let mut native_columns: HashMap<String, HashMap<String, String>> = HashMap::new();
268    let db_type = introspector.database_type();
269
270    // L1: Build relation lookup maps
271    let relations = introspector.list_relations().await?;
272    let (schema_qualified, unqualified) = build_relation_maps(&relations);
273
274    // Validate queries
275    for query in &schema.queries {
276        if let Some(ref source) = query.sql_source {
277            // L1: Check relation exists
278            if !relation_exists(&schema_qualified, &unqualified, source) {
279                warnings.push(DatabaseWarning::MissingRelation {
280                    query_name: query.name.clone(),
281                    sql_source: source.clone(),
282                });
283                continue; // Skip L2/L3 if relation doesn't exist
284            }
285
286            // L2: Get columns for the relation.
287            // Pass the full source (possibly schema-qualified like "benchmark.tv_post") so
288            // the introspector can use the explicit schema when present.
289            let columns = introspector.get_columns(source).await?;
290            let column_map: HashMap<String, String> =
291                columns.into_iter().map(|(name, dtype, _)| (name, dtype)).collect();
292
293            // L2: Check jsonb_column
294            let jsonb_col = &query.jsonb_column;
295            if !jsonb_col.is_empty() {
296                if let Some(actual_type) = column_map.get(jsonb_col) {
297                    if !is_json_type(actual_type, db_type) {
298                        warnings.push(DatabaseWarning::WrongJsonColumnType {
299                            query_name:  query.name.clone(),
300                            sql_source:  source.clone(),
301                            column_name: jsonb_col.clone(),
302                            actual_type: actual_type.clone(),
303                        });
304                    }
305                } else {
306                    warnings.push(DatabaseWarning::MissingJsonColumn {
307                        query_name:  query.name.clone(),
308                        sql_source:  source.clone(),
309                        column_name: jsonb_col.clone(),
310                    });
311                }
312            }
313
314            // L2: Check relay_cursor_column
315            if query.relay {
316                if let Some(ref cursor_col) = query.relay_cursor_column {
317                    if !column_map.contains_key(cursor_col) {
318                        warnings.push(DatabaseWarning::MissingCursorColumn {
319                            query_name:  query.name.clone(),
320                            sql_source:  source.clone(),
321                            column_name: cursor_col.clone(),
322                        });
323                    }
324                }
325            }
326
327            // L3: Sample JSON keys if jsonb_column is valid JSON type
328            if !jsonb_col.is_empty() {
329                let json_type_ok =
330                    column_map.get(jsonb_col).is_some_and(|t| is_json_type(t, db_type));
331
332                if json_type_ok {
333                    validate_json_keys(
334                        schema,
335                        query,
336                        source,
337                        jsonb_col,
338                        introspector,
339                        source, // pass full schema-qualified source for sample queries
340                        &mut warnings,
341                    )
342                    .await?;
343                }
344            }
345
346            // L2: Detect native columns for direct (non-auto-param) arguments.
347            let direct_args: Vec<&str> = query
348                .arguments
349                .iter()
350                .filter(|a| !AUTO_PARAM_NAMES.contains(&a.name.as_str()))
351                .map(|a| a.name.as_str())
352                .collect();
353
354            if !direct_args.is_empty() {
355                let mut query_native: HashMap<String, String> = HashMap::new();
356                for arg_name in &direct_args {
357                    if let Some(col_type) = column_map.get(*arg_name) {
358                        query_native.insert((*arg_name).to_string(), col_type.clone());
359                    } else {
360                        warnings.push(DatabaseWarning::NativeColumnFallback {
361                            query_name: query.name.clone(),
362                            sql_source: source.clone(),
363                            arg_name:   (*arg_name).to_string(),
364                        });
365                    }
366                }
367                if !query_native.is_empty() {
368                    native_columns.insert(query.name.clone(), query_native);
369                }
370            }
371
372            // L1: Check additional_views
373            for view in &query.additional_views {
374                if !relation_exists(&schema_qualified, &unqualified, view) {
375                    warnings.push(DatabaseWarning::MissingAdditionalView {
376                        query_name: query.name.clone(),
377                        view_name:  view.clone(),
378                    });
379                }
380            }
381        }
382    }
383
384    // Validate mutations (L1 only)
385    for mutation in &schema.mutations {
386        if let Some(ref source) = mutation.sql_source {
387            if !relation_exists(&schema_qualified, &unqualified, source) {
388                warnings.push(DatabaseWarning::MissingRelation {
389                    query_name: mutation.name.clone(),
390                    sql_source: source.clone(),
391                });
392            }
393        }
394    }
395
396    Ok(DatabaseValidationReport {
397        warnings,
398        native_columns,
399    })
400}
401
402/// Build lookup maps from the list of relations.
403fn build_relation_maps(
404    relations: &[RelationInfo],
405) -> (HashMap<(String, String), RelationInfo>, HashMap<String, Vec<String>>) {
406    let mut schema_qualified = HashMap::new();
407    let mut unqualified: HashMap<String, Vec<String>> = HashMap::new();
408
409    for rel in relations {
410        schema_qualified.insert((rel.schema.clone(), rel.name.clone()), rel.clone());
411        unqualified.entry(rel.name.clone()).or_default().push(rel.schema.clone());
412    }
413
414    (schema_qualified, unqualified)
415}
416
417/// Validate JSON keys in sampled data for L3 checking.
418async fn validate_json_keys(
419    schema: &CompiledSchema,
420    query: &fraiseql_core::schema::QueryDefinition,
421    source: &str,
422    jsonb_col: &str,
423    introspector: &impl DatabaseIntrospector,
424    table_name: &str,
425    warnings: &mut Vec<DatabaseWarning>,
426) -> fraiseql_core::Result<()> {
427    let samples = introspector.get_sample_json_rows(table_name, jsonb_col, 5).await?;
428
429    if samples.is_empty() {
430        return Ok(());
431    }
432
433    // Merge all top-level keys from sampled rows
434    let mut all_keys = HashSet::new();
435    for sample in &samples {
436        if let serde_json::Value::Object(map) = sample {
437            for key in map.keys() {
438                all_keys.insert(key.clone());
439            }
440        }
441    }
442
443    if all_keys.is_empty() {
444        return Ok(());
445    }
446
447    // Find the type definition for this query's return type
448    let type_def = schema.types.iter().find(|t| t.name.as_str() == query.return_type);
449
450    if let Some(type_def) = type_def {
451        for field in &type_def.fields {
452            let field_str = field.name.as_str();
453            let json_key = to_snake_case(field_str);
454            // Skip fields that are top-level columns (not from JSONB)
455            // Convention: fields like "id", "pk_*", "fk_*" are columns, not JSON keys
456            if field_str == "id" || field_str.starts_with("pk_") || field_str.starts_with("fk_") {
457                continue;
458            }
459            if !all_keys.contains(&json_key) && !all_keys.contains(field_str) {
460                warnings.push(DatabaseWarning::MissingJsonKey {
461                    query_name: query.name.clone(),
462                    sql_source: source.to_string(),
463                    json_column: jsonb_col.to_string(),
464                    field_name: field_str.to_string(),
465                    json_key,
466                });
467            }
468        }
469    }
470
471    Ok(())
472}
473
474/// Enum dispatch for database introspectors.
475///
476/// Uses enum dispatch instead of `Box<dyn DatabaseIntrospector>` because the
477/// trait uses `async_fn_in_trait` and cannot be object-safe.
478pub enum AnyIntrospector {
479    /// PostgreSQL introspector.
480    Postgres(fraiseql_core::db::PostgresIntrospector),
481    #[cfg(feature = "mysql")]
482    /// MySQL introspector.
483    MySql(fraiseql_core::db::MySqlIntrospector),
484    #[cfg(feature = "sqlite")]
485    /// SQLite introspector.
486    Sqlite(fraiseql_core::db::SqliteIntrospector),
487    #[cfg(feature = "sqlserver")]
488    /// SQL Server introspector.
489    SqlServer(fraiseql_core::db::SqlServerIntrospector),
490}
491
492impl DatabaseIntrospector for AnyIntrospector {
493    async fn list_fact_tables(&self) -> fraiseql_core::Result<Vec<String>> {
494        match self {
495            Self::Postgres(i) => i.list_fact_tables().await,
496            #[cfg(feature = "mysql")]
497            Self::MySql(i) => i.list_fact_tables().await,
498            #[cfg(feature = "sqlite")]
499            Self::Sqlite(i) => i.list_fact_tables().await,
500            #[cfg(feature = "sqlserver")]
501            Self::SqlServer(i) => i.list_fact_tables().await,
502        }
503    }
504
505    async fn get_columns(
506        &self,
507        table_name: &str,
508    ) -> fraiseql_core::Result<Vec<(String, String, bool)>> {
509        match self {
510            Self::Postgres(i) => i.get_columns(table_name).await,
511            #[cfg(feature = "mysql")]
512            Self::MySql(i) => i.get_columns(table_name).await,
513            #[cfg(feature = "sqlite")]
514            Self::Sqlite(i) => i.get_columns(table_name).await,
515            #[cfg(feature = "sqlserver")]
516            Self::SqlServer(i) => i.get_columns(table_name).await,
517        }
518    }
519
520    async fn get_indexed_columns(&self, table_name: &str) -> fraiseql_core::Result<Vec<String>> {
521        match self {
522            Self::Postgres(i) => i.get_indexed_columns(table_name).await,
523            #[cfg(feature = "mysql")]
524            Self::MySql(i) => i.get_indexed_columns(table_name).await,
525            #[cfg(feature = "sqlite")]
526            Self::Sqlite(i) => i.get_indexed_columns(table_name).await,
527            #[cfg(feature = "sqlserver")]
528            Self::SqlServer(i) => i.get_indexed_columns(table_name).await,
529        }
530    }
531
532    fn database_type(&self) -> DatabaseType {
533        match self {
534            Self::Postgres(i) => i.database_type(),
535            #[cfg(feature = "mysql")]
536            Self::MySql(i) => i.database_type(),
537            #[cfg(feature = "sqlite")]
538            Self::Sqlite(i) => i.database_type(),
539            #[cfg(feature = "sqlserver")]
540            Self::SqlServer(i) => i.database_type(),
541        }
542    }
543
544    async fn get_sample_jsonb(
545        &self,
546        table_name: &str,
547        column_name: &str,
548    ) -> fraiseql_core::Result<Option<serde_json::Value>> {
549        match self {
550            Self::Postgres(i) => i.get_sample_jsonb(table_name, column_name).await,
551            #[cfg(feature = "mysql")]
552            Self::MySql(i) => i.get_sample_jsonb(table_name, column_name).await,
553            #[cfg(feature = "sqlite")]
554            Self::Sqlite(i) => i.get_sample_jsonb(table_name, column_name).await,
555            #[cfg(feature = "sqlserver")]
556            Self::SqlServer(i) => i.get_sample_jsonb(table_name, column_name).await,
557        }
558    }
559
560    async fn list_relations(&self) -> fraiseql_core::Result<Vec<fraiseql_core::db::RelationInfo>> {
561        match self {
562            Self::Postgres(i) => i.list_relations().await,
563            #[cfg(feature = "mysql")]
564            Self::MySql(i) => i.list_relations().await,
565            #[cfg(feature = "sqlite")]
566            Self::Sqlite(i) => i.list_relations().await,
567            #[cfg(feature = "sqlserver")]
568            Self::SqlServer(i) => i.list_relations().await,
569        }
570    }
571
572    async fn get_sample_json_rows(
573        &self,
574        table_name: &str,
575        column_name: &str,
576        limit: usize,
577    ) -> fraiseql_core::Result<Vec<serde_json::Value>> {
578        match self {
579            Self::Postgres(i) => i.get_sample_json_rows(table_name, column_name, limit).await,
580            #[cfg(feature = "mysql")]
581            Self::MySql(i) => i.get_sample_json_rows(table_name, column_name, limit).await,
582            #[cfg(feature = "sqlite")]
583            Self::Sqlite(i) => i.get_sample_json_rows(table_name, column_name, limit).await,
584            #[cfg(feature = "sqlserver")]
585            Self::SqlServer(i) => i.get_sample_json_rows(table_name, column_name, limit).await,
586        }
587    }
588}
589
590/// Create an introspector from a database URL.
591///
592/// Detects the database type from the URL scheme and creates the appropriate
593/// introspector with a connection pool.
594///
595/// # Errors
596///
597/// Returns error if the URL scheme is unrecognized or the connection pool
598/// cannot be created.
599#[allow(clippy::unused_async)] // Reason: callers always .await this; feature-gated branches do use await
600pub async fn create_introspector(db_url: &str) -> anyhow::Result<AnyIntrospector> {
601    if db_url.starts_with("postgres") {
602        use deadpool_postgres::{Config, ManagerConfig, RecyclingMethod, Runtime};
603        use tokio_postgres::NoTls;
604
605        let mut cfg = Config::new();
606        cfg.url = Some(db_url.to_string());
607        cfg.manager = Some(ManagerConfig {
608            recycling_method: RecyclingMethod::Fast,
609        });
610        cfg.pool = Some(deadpool_postgres::PoolConfig::new(2));
611
612        let pool = cfg
613            .create_pool(Some(Runtime::Tokio1), NoTls)
614            .map_err(|e| anyhow::anyhow!("Failed to create PostgreSQL pool: {e}"))?;
615
616        Ok(AnyIntrospector::Postgres(fraiseql_core::db::PostgresIntrospector::new(pool)))
617    } else if db_url.starts_with("mysql") || db_url.starts_with("mariadb") {
618        #[cfg(feature = "mysql")]
619        {
620            use sqlx::mysql::MySqlPool;
621
622            let pool = MySqlPool::connect(db_url)
623                .await
624                .map_err(|e| anyhow::anyhow!("Failed to create MySQL pool: {e}"))?;
625
626            Ok(AnyIntrospector::MySql(fraiseql_core::db::MySqlIntrospector::new(pool)))
627        }
628        #[cfg(not(feature = "mysql"))]
629        {
630            anyhow::bail!("MySQL support not compiled in. Rebuild with `--features mysql`.")
631        }
632    } else if db_url.starts_with("sqlite")
633        || std::path::Path::new(db_url)
634            .extension()
635            .is_some_and(|ext| ext.eq_ignore_ascii_case("db") || ext.eq_ignore_ascii_case("sqlite"))
636    {
637        #[cfg(feature = "sqlite")]
638        {
639            use sqlx::sqlite::SqlitePool;
640
641            let pool = SqlitePool::connect(db_url)
642                .await
643                .map_err(|e| anyhow::anyhow!("Failed to create SQLite pool: {e}"))?;
644
645            Ok(AnyIntrospector::Sqlite(fraiseql_core::db::SqliteIntrospector::new(pool)))
646        }
647        #[cfg(not(feature = "sqlite"))]
648        {
649            anyhow::bail!("SQLite support not compiled in. Rebuild with `--features sqlite`.")
650        }
651    } else if db_url.starts_with("mssql") || db_url.starts_with("server=") {
652        #[cfg(feature = "sqlserver")]
653        {
654            use bb8::Pool;
655            use bb8_tiberius::ConnectionManager;
656            use tiberius::Config;
657
658            let config = Config::from_ado_string(db_url).map_err(|e| {
659                anyhow::anyhow!("Failed to parse SQL Server connection string: {e}")
660            })?;
661            let mgr = ConnectionManager::build(config).map_err(|e| {
662                anyhow::anyhow!("Failed to build SQL Server connection manager: {e}")
663            })?;
664            let pool = Pool::builder()
665                .max_size(2)
666                .build(mgr)
667                .await
668                .map_err(|e| anyhow::anyhow!("Failed to create SQL Server pool: {e}"))?;
669
670            Ok(AnyIntrospector::SqlServer(fraiseql_core::db::SqlServerIntrospector::new(pool)))
671        }
672        #[cfg(not(feature = "sqlserver"))]
673        {
674            anyhow::bail!(
675                "SQL Server support not compiled in. Rebuild with `--features sqlserver`."
676            )
677        }
678    } else {
679        anyhow::bail!("Unrecognized database URL scheme: {db_url}")
680    }
681}