umbral_core/
inspect.rs

1//! `inspectdb` — introspect an existing database into umbral models.
2//!
3//! The porting payoff. A team with an existing
4//! SQLite database points `inspectdb` at it and gets a `models.rs`
5//! with `#[derive(Model)]` structs plus a `0001_initial.json`
6//! migration carrying one `CreateTable` op per table. The migration
7//! is recorded as applied in `umbral_migrations` so the next `migrate`
8//! is a no-op until the user actually changes a model.
9//!
10//! After that, the introspected schema enters the M5 declare →
11//! migrate → change → migrate loop with no separate code path.
12//!
13//! ## Backend coverage
14//!
15//! - **SQLite (M6 v1).** [`introspect_pool`] reads `sqlite_master` for
16//!   table names and `PRAGMA table_info` for column descriptors.
17//! - **Postgres (Phase 3 of the rollout).** [`introspect_pool_pg`]
18//!   reads `information_schema.tables` / `information_schema.columns`
19//!   and joins `information_schema.table_constraints` + `key_column_usage`
20//!   for primary keys. Same `IntrospectedSchema` output; the
21//!   downstream pipeline (`render_models` / `render_initial_migration`
22//!   / `write_outputs`) is backend-agnostic.
23//!
24//! ## M6 v1 scope
25//!
26//! - **Output.** A flat `models.rs` plus `migrations/0001_initial.json`
27//!   in the user-chosen output directory. No `Cargo.toml`, no `lib.rs`
28//!   with a `Plugin` impl: the plugin trait isn't shipped until M7,
29//!   so M6 v1 leaves the wiring (one `mod models;` plus one
30//!   `.model::<T>()` per generated struct) to the user. M7 turns the
31//!   output into a self-contained plugin crate.
32//! - **Type mapping.** Covers the M5 [`SqlType`] catalogue
33//!   (integers, floats, bool, text, date / time / timestamptz, uuid)
34//!   plus their nullable variants. Anything else (NUMERIC, JSON,
35//!   BYTEA, arrays, custom types) returns
36//!   [`InspectError::UnsupportedColumnType`] with the table / column
37//!   names; the user fixes by-hand or waits for the field-type
38//!   catalogue to grow.
39//! - **FKs and indexes.** Not yet read out. The CreateTable op carries
40//!   columns only; FK / index detection lands with the field-level
41//!   support in [`crate::orm`].
42//!
43//! See [`docs/specs/07-inspectdb.md`] for the eventual target shape
44//! and the deferred items.
45//!
46//! [`DatabaseBackend`]: crate::backend::DatabaseBackend
47//! [`SqlType`]: crate::orm::SqlType
48
49use std::path::{Path, PathBuf};
50
51use sqlx::{PgPool, Row, SqlitePool};
52use umbral_casing::{pascal_case_from_table, to_snake_case};
53
54use crate::migrate::{self, Column, MigrationFile, ModelMeta, Operation, Snapshot};
55use crate::orm::SqlType;
56
57/// Default plugin name the generated migration is filed under. Matches
58/// [`crate::migrate::APP_PLUGIN_NAME`] so the produced
59/// `0001_initial.json` lands inside the same `migrations/app/`
60/// directory the M5 engine reads from. M7 lifts this once the user can
61/// choose a real plugin name via `--plugin`.
62pub const INSPECTED_PLUGIN_NAME: &str = migrate::APP_PLUGIN_NAME;
63
64/// Default filename for the introspected initial migration.
65pub const INITIAL_MIGRATION_ID: &str = "0001_initial";
66
67/// The introspection result. A flat list of tables, each with its
68/// columns in declaration order. Indexes and foreign keys are omitted
69/// at M6 v1 (the field types they target don't exist yet).
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct IntrospectedSchema {
72    pub tables: Vec<IntrospectedTable>,
73}
74
75/// One introspected table.
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub struct IntrospectedTable {
78    /// The SQL table name as it appears in the database.
79    pub table: String,
80    /// The struct name the renderer will use. Defaults to the table
81    /// name in UpperCamelCase; the M6 v1 importer does not strip
82    /// prefixes (deferred to M7's `--strip-prefix` flag).
83    pub name: String,
84    /// One descriptor per column, in declaration order.
85    pub columns: Vec<IntrospectedColumn>,
86}
87
88/// One introspected column.
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub struct IntrospectedColumn {
91    pub name: String,
92    pub ty: SqlType,
93    pub primary_key: bool,
94    pub nullable: bool,
95}
96
97/// Errors `inspectdb` can produce. Carries enough detail for the CLI
98/// to print a single-line diagnostic with the offending table and
99/// column.
100#[derive(Debug)]
101pub enum InspectError {
102    /// IO error reading or writing a generated file.
103    Io(std::io::Error),
104    /// JSON serialisation error pretty-printing the generated migration.
105    Json(serde_json::Error),
106    /// sqlx error executing the introspection queries.
107    Sqlx(sqlx::Error),
108    /// The introspection ran but found no tables. Surfaced so the CLI
109    /// can print "nothing to import" instead of writing empty files.
110    NoTables,
111    /// A column's SQL type isn't in the M6 v1 mapping table. Holds the
112    /// table / column / raw SQL type so the user can decide whether to
113    /// add a field type, edit the generated code, or wait for the
114    /// catalogue to grow.
115    UnsupportedColumnType {
116        table: String,
117        column: String,
118        sql_type: String,
119    },
120    /// Pass-through for migration-engine failures (e.g. recording the
121    /// initial migration as applied).
122    Migrate(migrate::MigrateError),
123}
124
125impl std::fmt::Display for InspectError {
126    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127        match self {
128            InspectError::Io(e) => write!(f, "umbral inspectdb: io: {e}"),
129            InspectError::Json(e) => write!(f, "umbral inspectdb: json: {e}"),
130            InspectError::Sqlx(e) => write!(f, "umbral inspectdb: sqlx: {e}"),
131            InspectError::NoTables => write!(
132                f,
133                "umbral inspectdb: no tables found in the database (nothing to import)"
134            ),
135            InspectError::UnsupportedColumnType {
136                table,
137                column,
138                sql_type,
139            } => write!(
140                f,
141                "umbral inspectdb: column `{table}.{column}` has unsupported SQL type `{sql_type}`; \
142                 add a matching SqlType variant or edit the generated model by hand"
143            ),
144            InspectError::Migrate(e) => write!(f, "umbral inspectdb: migrate: {e}"),
145        }
146    }
147}
148
149impl std::error::Error for InspectError {}
150
151impl From<std::io::Error> for InspectError {
152    fn from(e: std::io::Error) -> Self {
153        Self::Io(e)
154    }
155}
156
157impl From<sqlx::Error> for InspectError {
158    fn from(e: sqlx::Error) -> Self {
159        Self::Sqlx(e)
160    }
161}
162
163impl From<serde_json::Error> for InspectError {
164    fn from(e: serde_json::Error) -> Self {
165        Self::Json(e)
166    }
167}
168
169impl From<migrate::MigrateError> for InspectError {
170    fn from(e: migrate::MigrateError) -> Self {
171        Self::Migrate(e)
172    }
173}
174
175/// CLI-driven options. The CLI subcommand wires its flags into this
176/// struct and hands it to [`inspectdb`].
177#[derive(Debug, Clone)]
178pub struct InspectOptions {
179    /// Directory the generated files are written under. `models.rs`
180    /// lands at the root; the migration lands at
181    /// `<output>/migrations/<INSPECTED_PLUGIN_NAME>/0001_initial.json`.
182    pub output: PathBuf,
183    /// Mark `0001_initial` as applied in `umbral_migrations` after
184    /// writing it. The right default when the target database already
185    /// has tables (running the migration would fail). Off for empty
186    /// databases.
187    pub mark_applied: bool,
188}
189
190/// Summary returned to the CLI. Counts that the caller can render as a
191/// one-line "imported N tables / M columns" message.
192#[derive(Debug, Clone, Default)]
193pub struct InspectReport {
194    pub tables: usize,
195    pub columns: usize,
196    pub models_path: PathBuf,
197    pub migration_path: PathBuf,
198}
199
200// =========================================================================
201// Top-level entry points. Bodies filled in by the M6 fan-out subagents.
202// =========================================================================
203
204/// Run the full `inspectdb` pipeline against the ambient pool:
205/// introspect (dispatching on the active backend), render `models.rs`,
206/// render `0001_initial.json`, write both to `opts.output`, and
207/// optionally mark applied.
208///
209/// Phase 3 of the Postgres rollout taught this entry point to dispatch
210/// on `DbPool` — the SQLite path uses `PRAGMA table_info`; the
211/// Postgres path uses `information_schema`. The downstream pipeline
212/// (rendering + writing) is backend-agnostic and runs the same way.
213pub async fn inspectdb(opts: InspectOptions) -> Result<InspectReport, InspectError> {
214    let schema = match crate::db::pool_dispatched() {
215        crate::db::DbPool::Sqlite(pool) => introspect_pool(pool).await?,
216        crate::db::DbPool::Postgres(pool) => introspect_pool_pg(pool).await?,
217    };
218    if schema.tables.is_empty() {
219        return Err(InspectError::NoTables);
220    }
221
222    let models_src = render_models(&schema);
223    let migration = render_initial_migration(&schema);
224    let report = write_outputs(&opts.output, &models_src, &migration).await?;
225
226    if opts.mark_applied {
227        let hash = migration.snapshot_after.hash();
228        migrate::record_applied(&migration.plugin, &migration.id, &hash).await?;
229    }
230
231    Ok(report)
232}
233
234/// Introspect the schema reachable through the given SQLite pool.
235/// Reads `sqlite_master` for table names and `PRAGMA table_info(...)`
236/// for column descriptors. Skips internal tables (`sqlite_*`,
237/// `umbral_migrations`).
238pub async fn introspect_pool(pool: &SqlitePool) -> Result<IntrospectedSchema, InspectError> {
239    // List user tables in lexical name order. `sqlite_master` carries
240    // both tables and indexes; the `type = 'table'` predicate scopes the
241    // result to tables. The skip-list takes out SQLite's internal
242    // bookkeeping (`sqlite_%`) and umbral's own tracking table, which
243    // would otherwise loop back through the migration engine.
244    let table_rows = sqlx::query(
245        "SELECT name FROM sqlite_master \
246         WHERE type = 'table' \
247           AND name NOT LIKE 'sqlite_%' \
248           AND name <> 'umbral_migrations' \
249         ORDER BY name",
250    )
251    .fetch_all(pool)
252    .await?;
253
254    let mut tables: Vec<IntrospectedTable> = Vec::with_capacity(table_rows.len());
255    for row in table_rows {
256        let table: String = row.try_get("name")?;
257        let columns = introspect_columns(pool, &table).await?;
258        tables.push(IntrospectedTable {
259            name: pascal_case_from_table(&table),
260            table,
261            columns,
262        });
263    }
264
265    Ok(IntrospectedSchema { tables })
266}
267
268/// Introspect the schema reachable through the given Postgres pool.
269/// Reads `information_schema.tables` for table names,
270/// `information_schema.columns` for column descriptors, and joins
271/// `information_schema.table_constraints` + `key_column_usage` for
272/// the primary-key flag. Scoped to the `public` schema by default;
273/// internal Postgres schemas and umbral's own `umbral_migrations`
274/// tracking table are skipped.
275///
276/// The output is the same `IntrospectedSchema` the SQLite path
277/// produces — downstream rendering doesn't know which backend the
278/// data came from.
279pub async fn introspect_pool_pg(pool: &PgPool) -> Result<IntrospectedSchema, InspectError> {
280    // List user tables in the `public` schema, lexically. Postgres
281    // information_schema is standard SQL; pg_catalog is the lower-
282    // level surface but information_schema is portable across
283    // Postgres-compatible servers and carries everything the
284    // SqlType catalogue needs.
285    let table_rows: Vec<(String,)> = sqlx::query_as(
286        "SELECT table_name FROM information_schema.tables \
287         WHERE table_schema = 'public' \
288           AND table_type = 'BASE TABLE' \
289           AND table_name <> 'umbral_migrations' \
290         ORDER BY table_name",
291    )
292    .fetch_all(pool)
293    .await?;
294
295    let mut tables: Vec<IntrospectedTable> = Vec::with_capacity(table_rows.len());
296    for (table,) in table_rows {
297        let columns = introspect_columns_pg(pool, &table).await?;
298        tables.push(IntrospectedTable {
299            name: pascal_case_from_table(&table),
300            table,
301            columns,
302        });
303    }
304
305    Ok(IntrospectedSchema { tables })
306}
307
308/// Read one Postgres table's columns via `information_schema.columns`,
309/// plus a primary-key join over `information_schema.table_constraints`
310/// and `key_column_usage`. Columns come back in declaration order
311/// (`ordinal_position`).
312///
313/// `data_type` is the normalised type string Postgres exposes through
314/// information_schema (e.g. `"integer"`, `"character varying"`,
315/// `"timestamp with time zone"`); [`map_postgres_type`] maps it to the
316/// umbral `SqlType` catalogue. Anything unmapped surfaces as
317/// [`InspectError::UnsupportedColumnType`] with the table / column
318/// names and the raw type string.
319async fn introspect_columns_pg(
320    pool: &PgPool,
321    table: &str,
322) -> Result<Vec<IntrospectedColumn>, InspectError> {
323    // The primary-key lookup runs once per table. The set is typically
324    // tiny (one column for most tables, a handful for composite keys)
325    // so collecting it up-front into a Vec keeps the inner column loop
326    // O(columns × pk_columns) without an extra round trip per column.
327    let pk_rows: Vec<(String,)> = sqlx::query_as(
328        "SELECT kcu.column_name \
329         FROM information_schema.table_constraints tc \
330         JOIN information_schema.key_column_usage kcu \
331           ON tc.constraint_name = kcu.constraint_name \
332          AND tc.table_schema = kcu.table_schema \
333         WHERE tc.constraint_type = 'PRIMARY KEY' \
334           AND tc.table_schema = 'public' \
335           AND tc.table_name = $1",
336    )
337    .bind(table)
338    .fetch_all(pool)
339    .await?;
340    let pk_columns: std::collections::HashSet<String> = pk_rows.into_iter().map(|(c,)| c).collect();
341
342    // `udt_name` carries the underlying type name even when `data_type`
343    // is the abstract `"ARRAY"` placeholder. For `bigint[]` the
344    // information_schema reports data_type = "ARRAY" and udt_name =
345    // "_int8" (underscore prefix marks the array variant in pg_type).
346    // For non-array columns udt_name carries the same physical name
347    // (`int8`, `text`, etc.) but `data_type` is the canonical match
348    // key we already lookup against.
349    let column_rows: Vec<(String, String, String, String)> = sqlx::query_as(
350        "SELECT column_name, data_type, is_nullable, udt_name \
351         FROM information_schema.columns \
352         WHERE table_schema = 'public' AND table_name = $1 \
353         ORDER BY ordinal_position",
354    )
355    .bind(table)
356    .fetch_all(pool)
357    .await?;
358
359    let mut columns: Vec<IntrospectedColumn> = Vec::with_capacity(column_rows.len());
360    for (name, data_type, is_nullable, udt_name) in column_rows {
361        let ty = if data_type.eq_ignore_ascii_case("ARRAY") {
362            // Element type comes from udt_name with the leading
363            // underscore stripped. `_int8` -> int8 -> ArrayElement::BigInt.
364            let elem_name = udt_name.strip_prefix('_').unwrap_or(udt_name.as_str());
365            map_postgres_array_element(elem_name).ok_or_else(|| {
366                InspectError::UnsupportedColumnType {
367                    table: table.to_string(),
368                    column: name.clone(),
369                    sql_type: format!("ARRAY of {elem_name}"),
370                }
371            })?
372        } else {
373            map_postgres_type(&data_type).ok_or_else(|| InspectError::UnsupportedColumnType {
374                table: table.to_string(),
375                column: name.clone(),
376                sql_type: data_type.clone(),
377            })?
378        };
379        let primary_key = pk_columns.contains(&name);
380        // Postgres `is_nullable` is the string "YES" or "NO". A primary
381        // key is non-nullable by definition (the server enforces it);
382        // we force `nullable = false` so a SERIAL/BIGSERIAL PK round-
383        // trips through the M3 derive (which rejects `Option<T>` PKs)
384        // matching the behavioural fix already in place on the SQLite
385        // path.
386        let nullable = if primary_key {
387            false
388        } else {
389            is_nullable.eq_ignore_ascii_case("YES")
390        };
391        columns.push(IntrospectedColumn {
392            name,
393            ty,
394            primary_key,
395            nullable,
396        });
397    }
398
399    Ok(columns)
400}
401
402/// Map a Postgres array's element-type name (from `udt_name` with the
403/// leading underscore stripped) to a [`SqlType::Array`] variant.
404///
405/// The `udt_name` column on `information_schema.columns` carries the
406/// physical type name from `pg_catalog.pg_type`; array variants are
407/// prefixed with `_` (`_int8` for `bigint[]`, `_text` for `text[]`).
408/// The caller strips the prefix; this function maps the remaining
409/// stem to the umbral `ArrayElement` catalogue.
410///
411/// Returns `None` if the element type isn't in
412/// `umbral::orm::ArrayElement` — chrono types, JSON, network types,
413/// and Postgres-specific types like NUMERIC fall outside Phase 4.1's
414/// array catalogue.
415fn map_postgres_array_element(elem: &str) -> Option<SqlType> {
416    use crate::orm::ArrayElement;
417    let kind = match elem.trim().to_ascii_lowercase().as_str() {
418        // Postgres physical type names (per pg_type.typname). The
419        // information_schema strips spaces from the data_type alias
420        // form, so we match the canonical lowercase names here.
421        "int2" => ArrayElement::SmallInt,
422        "int4" => ArrayElement::Integer,
423        "int8" => ArrayElement::BigInt,
424        "float4" => ArrayElement::Real,
425        "float8" => ArrayElement::Double,
426        "bool" => ArrayElement::Boolean,
427        "text" | "varchar" | "bpchar" => ArrayElement::Text,
428        "uuid" => ArrayElement::Uuid,
429        _ => return None,
430    };
431    Some(SqlType::Array(kind))
432}
433
434/// Map a Postgres `information_schema.columns.data_type` value to the
435/// umbral `SqlType` catalogue. Postgres normalises the strings, so the
436/// match table is the canonical names rather than the optional aliases
437/// `pg_type.typname` would expose. The inverse of
438/// [`crate::backend::PostgresBackend::map_type`] — both stay in sync
439/// as new `SqlType` variants land.
440///
441/// Returns `None` on anything not in the catalogue (Postgres-specific
442/// types like `numeric`, `jsonb`, `bytea`, arrays, custom domains).
443/// The caller turns that into `UnsupportedColumnType` with enough
444/// context for the operator to fix by hand or wait for the field-
445/// type catalogue to grow.
446fn map_postgres_type(raw: &str) -> Option<SqlType> {
447    let normalised = raw.trim().to_ascii_lowercase();
448    match normalised.as_str() {
449        "smallint" => Some(SqlType::SmallInt),
450        "integer" => Some(SqlType::Integer),
451        "bigint" => Some(SqlType::BigInt),
452        "real" => Some(SqlType::Real),
453        "double precision" => Some(SqlType::Double),
454        "boolean" => Some(SqlType::Boolean),
455        // information_schema reports `text`, `character varying`, and
456        // `character` for VARCHAR / CHAR / TEXT. All round-trip through
457        // umbral's Text variant.
458        "text" | "character varying" | "character" => Some(SqlType::Text),
459        "date" => Some(SqlType::Date),
460        // Both timezone variants of TIME land on umbral's Time. The
461        // distinction is preserved in the database; the client-side
462        // type system doesn't model it yet.
463        "time without time zone" | "time with time zone" => Some(SqlType::Time),
464        // Likewise both timezone variants of TIMESTAMP land on
465        // Timestamptz. The umbral catalogue picks the with-tz variant
466        // as the default so chrono::DateTime<Utc> is the natural Rust
467        // type for either.
468        "timestamp without time zone" | "timestamp with time zone" => Some(SqlType::Timestamptz),
469        "uuid" => Some(SqlType::Uuid),
470        // Both `json` and `jsonb` round-trip to umbral's portable Json
471        // variant. The DDL renderer chose `jsonb` on the way out; if a
472        // pre-existing database stores values as `json` (the unindexed
473        // text variant), inspectdb still recognises it on the way in.
474        // A re-migrate would normalize to `jsonb` if the user re-creates
475        // the column, which matches the M5 declare-and-migrate loop.
476        "json" | "jsonb" => Some(SqlType::Json),
477        // Phase 4.4: Postgres network address types.
478        "inet" => Some(SqlType::Inet),
479        "cidr" => Some(SqlType::Cidr),
480        "macaddr" => Some(SqlType::MacAddr),
481        // gaps2 #70: text-backed Postgres types. `bit varying` and bare
482        // `bit` (the information_schema sometimes reports `bit` for a
483        // BIT(n)) both round-trip to the `Bit` variant.
484        "xml" => Some(SqlType::Xml),
485        "ltree" => Some(SqlType::Ltree),
486        "bit" | "bit varying" | "varbit" => Some(SqlType::Bit),
487        "tsvector" => Some(SqlType::FullText),
488        "bytea" => Some(SqlType::Bytes),
489        _ => None,
490    }
491}
492
493/// Read one table's columns via `PRAGMA table_info`. The PRAGMA returns
494/// `(cid, name, type, notnull, dflt_value, pk)` rows in declaration
495/// order, sorted defensively by `cid` so a downstream change to the
496/// PRAGMA's behaviour doesn't silently scramble field order.
497async fn introspect_columns(
498    pool: &SqlitePool,
499    table: &str,
500) -> Result<Vec<IntrospectedColumn>, InspectError> {
501    // The PRAGMA name can't be bound as a parameter, but it also can't
502    // contain user-supplied input here: `table` comes from `sqlite_master`
503    // and matches an existing table identifier by construction.
504    let sql = format!("PRAGMA table_info(\"{}\")", table.replace('"', "\"\""));
505    let mut rows = sqlx::query(&sql).fetch_all(pool).await?;
506    rows.sort_by_key(|r| r.try_get::<i64, _>("cid").unwrap_or(0));
507
508    let mut columns: Vec<IntrospectedColumn> = Vec::with_capacity(rows.len());
509    for row in rows {
510        let name: String = row.try_get("name")?;
511        let raw_type: String = row.try_get("type")?;
512        let notnull: i64 = row.try_get("notnull")?;
513        let pk: i64 = row.try_get("pk")?;
514        let ty = map_sqlite_type(&raw_type).ok_or_else(|| InspectError::UnsupportedColumnType {
515            table: table.to_string(),
516            column: name.clone(),
517            sql_type: raw_type.clone(),
518        })?;
519        let primary_key = pk != 0;
520        // SQLite's `PRAGMA table_info` reports `notnull = 0` for
521        // `INTEGER PRIMARY KEY` columns because they're aliases for
522        // ROWID (which SQLite manages internally). The columns are
523        // nonetheless guaranteed non-null: SQLite refuses to insert
524        // NULL into a primary key. Forcing `nullable = false` here
525        // makes the generated `#[derive(Model)]` compile (the M3
526        // derive's PK detection requires a non-`Option` PK field)
527        // and matches what the database actually enforces.
528        let nullable = if primary_key { false } else { notnull == 0 };
529        columns.push(IntrospectedColumn {
530            name,
531            ty,
532            primary_key,
533            nullable,
534        });
535    }
536    Ok(columns)
537}
538
539/// Map a raw SQLite type string to the M6 v1 [`SqlType`] catalogue.
540/// Case-insensitive; trailing `(n)` or `(p,s)` width parameters are
541/// stripped before matching so `VARCHAR(255)` and `NUMERIC(10,2)` come
542/// through as `varchar` and `numeric`. Returns `None` on anything not
543/// in the table; the caller turns that into
544/// [`InspectError::UnsupportedColumnType`] with the table and column
545/// names attached.
546fn map_sqlite_type(raw: &str) -> Option<SqlType> {
547    let head = match raw.split_once('(') {
548        Some((before, _)) => before,
549        None => raw,
550    };
551    let normalised = head.trim().to_ascii_lowercase();
552    match normalised.as_str() {
553        "smallint" | "int2" => Some(SqlType::SmallInt),
554        "int" | "integer" | "int4" => Some(SqlType::Integer),
555        "bigint" | "int8" => Some(SqlType::BigInt),
556        "real" | "float" | "float4" => Some(SqlType::Real),
557        "double" | "double precision" | "float8" => Some(SqlType::Double),
558        "boolean" | "bool" => Some(SqlType::Boolean),
559        "text" | "varchar" | "char" | "clob" | "character" | "varying character" | "nchar"
560        | "nvarchar" => Some(SqlType::Text),
561        "date" => Some(SqlType::Date),
562        "time" => Some(SqlType::Time),
563        "timestamp" | "timestamptz" | "datetime" => Some(SqlType::Timestamptz),
564        "uuid" => Some(SqlType::Uuid),
565        // SQLite doesn't have a native JSON column type, but a user
566        // declaring `CREATE TABLE t (data JSON)` parses the type-name
567        // verbatim into `sqlite_master` and `PRAGMA table_info`. Treat
568        // that as a hint that the column holds JSON content and route
569        // it through `SqlType::Json` (which lowers to TEXT on SQLite
570        // anyway).
571        "json" | "jsonb" => Some(SqlType::Json),
572        "blob" | "bytea" => Some(SqlType::Bytes),
573        _ => None,
574    }
575}
576
577// `derive_table_name` (was `to_snake_case`) and `pascal_case` (now
578// `pascal_case_from_table`) are imported from `umbral_casing` at the top
579// of this file. The local copies were removed in the gaps2 #77 refactor.
580
581/// Render the introspected schema as the contents of a `models.rs`
582/// file. The output is one `#[derive(Model)]` struct per table, with
583/// fields in declaration order and the `#[umbral(table = "…")]`
584/// attribute set when the struct name differs from the SQL table.
585///
586/// Structs are emitted in alphabetical order by struct name so a
587/// re-run against an unchanged schema produces a byte-identical file.
588/// Field-type rendering uses fully-qualified `chrono::*` / `uuid::*`
589/// paths so no extra `use` lines are needed at the top of the file.
590pub fn render_models(schema: &IntrospectedSchema) -> String {
591    let mut out = String::new();
592    out.push_str(HEADER);
593
594    let mut tables: Vec<&IntrospectedTable> = schema.tables.iter().collect();
595    tables.sort_by(|a, b| a.name.cmp(&b.name));
596
597    for table in tables {
598        out.push('\n');
599        out.push_str(&render_one_struct(table));
600    }
601    out
602}
603
604/// Two-line module doc plus the single facade import every generated
605/// file needs. Kept as a constant so the empty-schema path emits
606/// exactly the header and nothing else.
607const HEADER: &str = "\
608//! Generated by `umbral inspectdb`. Wire each struct into your App
609//! builder with `.model::<StructName>()`. Re-run `inspectdb` to
610//! regenerate; edits made by hand will be lost.
611
612use umbral::prelude::*;
613";
614
615/// Render a single `#[derive(Model)]` struct for one introspected table.
616/// The `#[umbral(table = "...")]` attribute is emitted only when the
617/// derive's auto-derived table name (snake_case of the struct name)
618/// doesn't equal the SQL table name. For the typical snake_case shape
619/// (`blog_post` -> `BlogPost` -> derive computes `"blog_post"`), the
620/// attribute is redundant and is left off. For unusual SQL casings
621/// (`POSTS` -> `Posts` -> derive computes `"posts"` not `"POSTS"`),
622/// the attribute is emitted and the M3.1 derive picks it up to
623/// override the default. See `umbral-macros/src/lib.rs` for the
624/// attribute parser.
625fn render_one_struct(table: &IntrospectedTable) -> String {
626    let mut out = String::new();
627    // `sqlx::FromRow` is required because the `Model` trait bounds it
628    // as a supertrait (see `crates/umbral-core/src/orm/model.rs`).
629    // Without it, `#[derive(Model)]` emits an `impl Model` whose
630    // sqlx::FromRow supertrait isn't satisfied and the generated file
631    // fails to compile.
632    out.push_str("#[derive(Debug, Clone, sqlx::FromRow, Model)]\n");
633    if to_snake_case(&table.name) != table.table {
634        out.push_str(&format!("#[umbral(table = \"{}\")]\n", table.table));
635    }
636    out.push_str(&format!("pub struct {} {{\n", table.name));
637    for column in &table.columns {
638        out.push_str(&format!(
639            "    pub {}: {},\n",
640            column.name,
641            render_field_type(column.ty, column.nullable),
642        ));
643    }
644    out.push_str("}\n");
645    out
646}
647
648/// Map `(SqlType, nullable)` to the Rust type string the derive macro's
649/// `classify_field_type` accepts. Mirrors the table in
650/// `umbral-macros/src/lib.rs` (see `FieldKind` for the full catalogue).
651fn render_field_type(ty: SqlType, nullable: bool) -> String {
652    let base = match ty {
653        SqlType::SmallInt => "i16".to_string(),
654        SqlType::Integer => "i32".to_string(),
655        SqlType::BigInt => "i64".to_string(),
656        SqlType::Real => "f32".to_string(),
657        SqlType::Double => "f64".to_string(),
658        SqlType::Boolean => "bool".to_string(),
659        SqlType::Text => "String".to_string(),
660        SqlType::Date => "chrono::NaiveDate".to_string(),
661        SqlType::Time => "chrono::NaiveTime".to_string(),
662        SqlType::Timestamptz => "chrono::DateTime<chrono::Utc>".to_string(),
663        SqlType::Uuid => "uuid::Uuid".to_string(),
664        SqlType::Json => "serde_json::Value".to_string(),
665        // Recurse through the element's SqlType. Wrapping in `Vec<...>`
666        // matches the derive's catalogue: a `Vec<i64>` declares an
667        // `Array(ArrayElement::BigInt)` field.
668        SqlType::Array(elem) => format!("Vec<{}>", render_field_type(elem.to_sql_type(), false)),
669        // Phase 4.4: Postgres network address types. Both `Inet` and
670        // `Cidr` round-trip through `ipnetwork::IpNetwork`; `MacAddr`
671        // uses the `mac_address` crate.
672        SqlType::Inet => "ipnetwork::IpNetwork".to_string(),
673        SqlType::Cidr => "ipnetwork::IpNetwork".to_string(),
674        SqlType::MacAddr => "mac_address::MacAddress".to_string(),
675        // gaps2 #70: text-backed Postgres types surface as `String`.
676        // inspectdb can't recover which `#[umbral(...)]` attr produced
677        // the column (the attr lives only in the source model, not the
678        // DB), so the generated model is a plain `String`; the user
679        // re-adds `#[umbral(xml)]` / `#[umbral(ltree)]` / `#[umbral(bit)]`
680        // if they want the native type back on a re-migrate.
681        SqlType::Xml => "String".to_string(),
682        SqlType::Ltree => "String".to_string(),
683        SqlType::Bit => "String".to_string(),
684        SqlType::FullText => "umbral::orm::TsVector".to_string(),
685        // ForeignKey inspectdb renders as i64 for now; the FK relationship
686        // introspection that would emit ForeignKey<T> is deferred.
687        SqlType::ForeignKey => "i64".to_string(),
688        // BLOB / BYTEA columns surface as Vec<u8> in user code.
689        SqlType::Bytes => "Vec<u8>".to_string(),
690        // BUG-10: NUMERIC introspection renders as
691        // `rust_decimal::Decimal`. inspectdb reads the column type
692        // from Postgres' `information_schema`; the resulting
693        // model imports use this exact path.
694        SqlType::Decimal => "rust_decimal::Decimal".to_string(),
695    };
696    let base = base.as_str();
697    if nullable {
698        format!("Option<{base}>")
699    } else {
700        base.to_string()
701    }
702}
703
704/// Render the introspected schema as a [`MigrationFile`] suitable for
705/// writing to `migrations/<INSPECTED_PLUGIN_NAME>/0001_initial.json`.
706/// One `CreateTable` per introspected table; `snapshot_after` captures
707/// the imported state so subsequent `make_in` runs diff against it.
708///
709/// Filled in by subagent B.
710pub fn render_initial_migration(schema: &IntrospectedSchema) -> MigrationFile {
711    let mut models: Vec<ModelMeta> = schema
712        .tables
713        .iter()
714        .map(|t| ModelMeta {
715            name: t.name.clone(),
716            table: t.table.clone(),
717            fields: t.columns.iter().map(Column::from).collect(),
718            display: t.name.clone(),
719            icon: "database".to_string(),
720            database: None,
721            singleton: false,
722            unique_together: Vec::new(),
723            indexes: Vec::new(),
724            ordering: Vec::new(),
725            m2m_relations: Vec::new(),
726            soft_delete: false,
727            // inspectdb has no plugin attribute to read; default to "app".
728            app_label: "app".to_string(),
729        })
730        .collect();
731    models.sort_by(|a, b| a.name.cmp(&b.name));
732
733    let operations = schema
734        .tables
735        .iter()
736        .map(|t| Operation::CreateTable {
737            table: t.table.clone(),
738            columns: t.columns.iter().map(Column::from).collect(),
739            unique_together: Vec::new(),
740            indexes: Vec::new(),
741        })
742        .collect();
743
744    MigrationFile {
745        id: INITIAL_MIGRATION_ID.to_string(),
746        plugin: INSPECTED_PLUGIN_NAME.to_string(),
747        depends_on: Vec::new(),
748        operations,
749        snapshot_after: Snapshot { models },
750    }
751}
752
753/// Write `models.rs` and the initial migration to `output`. Creates
754/// `output/` and `output/migrations/<INSPECTED_PLUGIN_NAME>/` as
755/// needed. Returns the report carrying the table / column counts and
756/// the paths.
757///
758/// The migration is pretty-printed so the file diffs cleanly when a
759/// later `makemigrations` writes the next migration alongside.
760pub async fn write_outputs(
761    output: &Path,
762    models_src: &str,
763    migration: &MigrationFile,
764) -> Result<InspectReport, InspectError> {
765    std::fs::create_dir_all(output)?;
766
767    let models_path = output.join("models.rs");
768    std::fs::write(&models_path, models_src)?;
769
770    let plugin_dir = output.join("migrations").join(INSPECTED_PLUGIN_NAME);
771    std::fs::create_dir_all(&plugin_dir)?;
772
773    let migration_path = plugin_dir.join(format!("{}.json", migration.id));
774    let json = serde_json::to_string_pretty(migration)?;
775    std::fs::write(&migration_path, json)?;
776
777    let (tables, columns) =
778        migration
779            .operations
780            .iter()
781            .fold((0usize, 0usize), |(t, c), op| match op {
782                Operation::CreateTable { columns, .. } => (t + 1, c + columns.len()),
783                Operation::CreateM2MTable { .. } => (t + 1, c + 2),
784                Operation::DropTable { .. }
785                | Operation::DropM2MTable { .. }
786                | Operation::AddColumn { .. }
787                | Operation::DropColumn { .. }
788                | Operation::AlterColumn { .. }
789                | Operation::RenameTable { .. }
790                | Operation::RenameColumn { .. }
791                | Operation::RunSql { .. } => (t, c),
792            });
793
794    Ok(InspectReport {
795        tables,
796        columns,
797        models_path,
798        migration_path,
799    })
800}
801
802// =========================================================================
803// Internal helpers.
804// =========================================================================
805
806impl From<&IntrospectedColumn> for Column {
807    fn from(c: &IntrospectedColumn) -> Self {
808        Self {
809            name: c.name.clone(),
810            ty: c.ty,
811            primary_key: c.primary_key,
812            nullable: c.nullable,
813            fk_target: None,
814            noform: false,
815            // inspectdb introspects no FK yet (`fk_target: None`), so a
816            // real DB constraint maps to the default `true`.
817            db_constraint: true,
818            noedit: false,
819            is_string_repr: false,
820            max_length: 0,
821            choices: Vec::new(),
822            choice_labels: Vec::new(),
823            default: String::new(),
824            is_multichoice: false,
825            // inspectdb does not introspect UNIQUE constraints yet
826            // (gap #65 ships the declare-side first; inspect-side
827            // lands when there's a real porting case that needs it).
828            unique: false,
829            on_delete: crate::orm::FkAction::NoAction,
830            on_update: crate::orm::FkAction::NoAction,
831            index: false,
832            auto_now_add: false,
833            auto_now: false,
834            help: String::new(),
835            example: String::new(),
836            widget: None,
837            supported_backends: Vec::new(),
838            min: None,
839            max: None,
840            text_format: ::core::option::Option::None,
841            slug_from: ::core::option::Option::None,
842        }
843    }
844}
845
846#[cfg(test)]
847mod tests {
848    use super::*;
849
850    fn col(name: &str, ty: SqlType, primary_key: bool, nullable: bool) -> IntrospectedColumn {
851        IntrospectedColumn {
852            name: name.to_string(),
853            ty,
854            primary_key,
855            nullable,
856        }
857    }
858
859    #[test]
860    fn empty_schema_renders_header_only() {
861        let out = render_models(&IntrospectedSchema { tables: Vec::new() });
862        assert_eq!(out, HEADER);
863    }
864
865    #[test]
866    fn snake_case_table_skips_attribute_when_derive_round_trips() {
867        let schema = IntrospectedSchema {
868            tables: vec![IntrospectedTable {
869                table: "blog_post".to_string(),
870                name: "BlogPost".to_string(),
871                columns: vec![
872                    col("id", SqlType::BigInt, true, false),
873                    col("title", SqlType::Text, false, false),
874                ],
875            }],
876        };
877        let out = render_models(&schema);
878        // `BlogPost` snake_cases to `blog_post` via the derive, so the
879        // attribute is redundant and is left off. This keeps the
880        // generated file compatible with the M3 derive, which doesn't
881        // yet recognise `#[umbral(...)]` attributes.
882        assert!(!out.contains("#[umbral(table"));
883        assert!(out.contains("pub struct BlogPost {"));
884        assert!(out.contains("pub id: i64,"));
885        assert!(out.contains("pub title: String,"));
886    }
887
888    #[test]
889    fn lowercase_single_word_table_skips_attribute() {
890        // `post` -> `Post` -> derive snake_cases to `"post"`, matches
891        // the source table verbatim, so the attribute is left off.
892        let schema = IntrospectedSchema {
893            tables: vec![IntrospectedTable {
894                table: "post".to_string(),
895                name: "Post".to_string(),
896                columns: vec![col("id", SqlType::BigInt, true, false)],
897            }],
898        };
899        let out = render_models(&schema);
900        assert!(!out.contains("#[umbral(table"));
901        assert!(out.contains("pub struct Post {"));
902    }
903
904    #[test]
905    fn non_round_tripping_table_name_keeps_attribute() {
906        // SQL tables with names the derive's snake_case won't reach
907        // (e.g. uppercase, runs of capitals, leading digits) need the
908        // explicit attribute. This case is rare in real ports but
909        // the renderer should still cover it for the derive's eventual
910        // attribute-support landing.
911        let schema = IntrospectedSchema {
912            tables: vec![IntrospectedTable {
913                table: "POSTS".to_string(),
914                name: "Posts".to_string(),
915                columns: vec![col("id", SqlType::BigInt, true, false)],
916            }],
917        };
918        let out = render_models(&schema);
919        assert!(out.contains("#[umbral(table = \"POSTS\")]"));
920    }
921
922    #[test]
923    fn nullable_column_wraps_in_option() {
924        let schema = IntrospectedSchema {
925            tables: vec![IntrospectedTable {
926                table: "post".to_string(),
927                name: "Post".to_string(),
928                columns: vec![
929                    col("id", SqlType::BigInt, true, false),
930                    col("published_at", SqlType::Timestamptz, false, true),
931                ],
932            }],
933        };
934        let out = render_models(&schema);
935        assert!(out.contains("pub published_at: Option<chrono::DateTime<chrono::Utc>>,"));
936    }
937
938    #[test]
939    fn type_catalogue_renders_each_sql_type() {
940        let schema = IntrospectedSchema {
941            tables: vec![IntrospectedTable {
942                table: "kitchen_sink".to_string(),
943                name: "KitchenSink".to_string(),
944                columns: vec![
945                    col("id", SqlType::BigInt, true, false),
946                    col("small", SqlType::SmallInt, false, false),
947                    col("medium", SqlType::Integer, false, false),
948                    col("real_v", SqlType::Real, false, false),
949                    col("double_v", SqlType::Double, false, false),
950                    col("flag", SqlType::Boolean, false, false),
951                    col("note", SqlType::Text, false, false),
952                    col("day", SqlType::Date, false, false),
953                    col("clock", SqlType::Time, false, false),
954                    col("at", SqlType::Timestamptz, false, false),
955                    col("uid", SqlType::Uuid, false, false),
956                ],
957            }],
958        };
959        let out = render_models(&schema);
960        for expected in [
961            "pub id: i64,",
962            "pub small: i16,",
963            "pub medium: i32,",
964            "pub real_v: f32,",
965            "pub double_v: f64,",
966            "pub flag: bool,",
967            "pub note: String,",
968            "pub day: chrono::NaiveDate,",
969            "pub clock: chrono::NaiveTime,",
970            "pub at: chrono::DateTime<chrono::Utc>,",
971            "pub uid: uuid::Uuid,",
972        ] {
973            assert!(out.contains(expected), "missing field render: {expected}");
974        }
975    }
976
977    #[test]
978    fn structs_are_sorted_by_name() {
979        let schema = IntrospectedSchema {
980            tables: vec![
981                IntrospectedTable {
982                    table: "zebra".to_string(),
983                    name: "Zebra".to_string(),
984                    columns: vec![col("id", SqlType::BigInt, true, false)],
985                },
986                IntrospectedTable {
987                    table: "antelope".to_string(),
988                    name: "Antelope".to_string(),
989                    columns: vec![col("id", SqlType::BigInt, true, false)],
990                },
991            ],
992        };
993        let out = render_models(&schema);
994        let antelope_at = out.find("struct Antelope").expect("Antelope rendered");
995        let zebra_at = out.find("struct Zebra").expect("Zebra rendered");
996        assert!(antelope_at < zebra_at);
997    }
998
999    #[test]
1000    fn header_carries_the_regen_warning_and_facade_import() {
1001        let out = render_models(&IntrospectedSchema { tables: Vec::new() });
1002        assert!(out.contains("Generated by `umbral inspectdb`"));
1003        assert!(out.contains("edits made by hand will be lost"));
1004        assert!(out.contains("use umbral::prelude::*;"));
1005    }
1006
1007    // --------------------------------------------------------------- //
1008    // Postgres type-mapping coverage (Phase 3).                        //
1009    // --------------------------------------------------------------- //
1010
1011    /// Every variant of the M5 SqlType catalogue has a mapping from
1012    /// the canonical Postgres `information_schema.columns.data_type`
1013    /// value back to the variant. Lockstep with
1014    /// `crate::backend::PostgresBackend::map_type` — if a SqlType
1015    /// variant lands, both `map_type` (outbound) and `map_postgres_type`
1016    /// (inbound) need an arm.
1017    #[test]
1018    fn map_postgres_type_covers_the_full_catalogue() {
1019        assert_eq!(map_postgres_type("smallint"), Some(SqlType::SmallInt));
1020        assert_eq!(map_postgres_type("integer"), Some(SqlType::Integer));
1021        assert_eq!(map_postgres_type("bigint"), Some(SqlType::BigInt));
1022        assert_eq!(map_postgres_type("real"), Some(SqlType::Real));
1023        assert_eq!(map_postgres_type("double precision"), Some(SqlType::Double));
1024        assert_eq!(map_postgres_type("boolean"), Some(SqlType::Boolean));
1025        assert_eq!(map_postgres_type("text"), Some(SqlType::Text));
1026        assert_eq!(
1027            map_postgres_type("character varying"),
1028            Some(SqlType::Text),
1029            "VARCHAR maps to Text",
1030        );
1031        assert_eq!(
1032            map_postgres_type("character"),
1033            Some(SqlType::Text),
1034            "CHAR maps to Text",
1035        );
1036        assert_eq!(map_postgres_type("date"), Some(SqlType::Date));
1037        assert_eq!(
1038            map_postgres_type("time without time zone"),
1039            Some(SqlType::Time),
1040        );
1041        assert_eq!(
1042            map_postgres_type("time with time zone"),
1043            Some(SqlType::Time)
1044        );
1045        assert_eq!(
1046            map_postgres_type("timestamp without time zone"),
1047            Some(SqlType::Timestamptz),
1048        );
1049        assert_eq!(
1050            map_postgres_type("timestamp with time zone"),
1051            Some(SqlType::Timestamptz),
1052        );
1053        assert_eq!(map_postgres_type("uuid"), Some(SqlType::Uuid));
1054        // Phase 4: both `json` and `jsonb` round-trip to the portable
1055        // `SqlType::Json` (DDL renders as `jsonb` on Postgres, TEXT on
1056        // SQLite).
1057        assert_eq!(map_postgres_type("json"), Some(SqlType::Json));
1058        assert_eq!(map_postgres_type("jsonb"), Some(SqlType::Json));
1059        // Phase 4.4: Postgres network address types.
1060        assert_eq!(map_postgres_type("inet"), Some(SqlType::Inet));
1061        assert_eq!(map_postgres_type("cidr"), Some(SqlType::Cidr));
1062        assert_eq!(map_postgres_type("macaddr"), Some(SqlType::MacAddr));
1063        // BLOB / BYTEA — Vec<u8> in Rust.
1064        assert_eq!(map_postgres_type("bytea"), Some(SqlType::Bytes));
1065    }
1066
1067    /// Postgres-specific types umbral doesn't model yet surface as
1068    /// `None` so the caller produces `UnsupportedColumnType` with the
1069    /// raw type string preserved. The catalogue lookups most likely to
1070    /// bite a port: numeric, bytea, arrays, network types. The
1071    /// user fixes by hand or waits for the catalogue to grow.
1072    ///
1073    /// Note `json`/`jsonb` are NOT on this list — Phase 4's `Json`
1074    /// SqlType variant maps both back to `SqlType::Json`. Likewise
1075    /// `inet`/`cidr`/`macaddr` left this list when Phase 4.4 added
1076    /// the matching SqlType variants. The companion arms in
1077    /// `map_postgres_type` are covered by
1078    /// `map_postgres_type_covers_the_full_catalogue` above.
1079    #[test]
1080    fn map_postgres_type_returns_none_for_postgres_only_types() {
1081        assert_eq!(map_postgres_type("numeric"), None);
1082        // `bytea` USED to be off-catalogue and returned None; once
1083        // SqlType::Bytes shipped, `bytea` started routing to it.
1084        // Asserted in the positive `map_postgres_type_covers_the_full_catalogue`
1085        // test instead.
1086        assert_eq!(map_postgres_type("ARRAY"), None);
1087    }
1088
1089    /// The mapping is case-insensitive on the input but matches against
1090    /// the canonical lowercase form information_schema reports. Whether
1091    /// the operator's DB returns `INTEGER` (uppercase, from a quoted
1092    /// type) or `integer` shouldn't matter.
1093    #[test]
1094    fn map_postgres_type_is_case_insensitive_on_input() {
1095        assert_eq!(map_postgres_type("INTEGER"), Some(SqlType::Integer));
1096        assert_eq!(map_postgres_type("Bigint"), Some(SqlType::BigInt));
1097        assert_eq!(map_postgres_type("UUID"), Some(SqlType::Uuid));
1098    }
1099
1100    /// Surrounding whitespace doesn't break the lookup. Trimming
1101    /// matches `map_sqlite_type`'s `trim()`; both functions parse
1102    /// values straight from a sqlx row and the trim is a cheap
1103    /// safety net.
1104    #[test]
1105    fn map_postgres_type_trims_whitespace() {
1106        assert_eq!(map_postgres_type("  bigint  "), Some(SqlType::BigInt));
1107    }
1108}
umbral_core/inspect.rs

umbral_core/
inspect.rs