umbral_core/inspect.rs
1//! `inspectdb` — introspect an existing database into umbral models.
2//!
3//! The porting payoff. A team with an existing
4//! SQLite database points `inspectdb` at it and gets a `models.rs`
5//! with `#[derive(Model)]` structs plus a `0001_initial.json`
6//! migration carrying one `CreateTable` op per table. The migration
7//! is recorded as applied in `umbral_migrations` so the next `migrate`
8//! is a no-op until the user actually changes a model.
9//!
10//! After that, the introspected schema enters the M5 declare →
11//! migrate → change → migrate loop with no separate code path.
12//!
13//! ## Backend coverage
14//!
15//! - **SQLite (M6 v1).** [`introspect_pool`] reads `sqlite_master` for
16//! table names and `PRAGMA table_info` for column descriptors.
17//! - **Postgres (Phase 3 of the rollout).** [`introspect_pool_pg`]
18//! reads `information_schema.tables` / `information_schema.columns`
19//! and joins `information_schema.table_constraints` + `key_column_usage`
20//! for primary keys. Same `IntrospectedSchema` output; the
21//! downstream pipeline (`render_models` / `render_initial_migration`
22//! / `write_outputs`) is backend-agnostic.
23//!
24//! ## M6 v1 scope
25//!
26//! - **Output.** A flat `models.rs` plus `migrations/0001_initial.json`
27//! in the user-chosen output directory. No `Cargo.toml`, no `lib.rs`
28//! with a `Plugin` impl: the plugin trait isn't shipped until M7,
29//! so M6 v1 leaves the wiring (one `mod models;` plus one
30//! `.model::<T>()` per generated struct) to the user. M7 turns the
31//! output into a self-contained plugin crate.
32//! - **Type mapping.** Covers the M5 [`SqlType`] catalogue
33//! (integers, floats, bool, text, date / time / timestamptz, uuid)
34//! plus their nullable variants. Anything else (NUMERIC, JSON,
35//! BYTEA, arrays, custom types) returns
36//! [`InspectError::UnsupportedColumnType`] with the table / column
37//! names; the user fixes by-hand or waits for the field-type
38//! catalogue to grow.
39//! - **FKs and indexes.** Not yet read out. The CreateTable op carries
40//! columns only; FK / index detection lands with the field-level
41//! support in [`crate::orm`].
42//!
43//! See [`docs/specs/07-inspectdb.md`] for the eventual target shape
44//! and the deferred items.
45//!
46//! [`DatabaseBackend`]: crate::backend::DatabaseBackend
47//! [`SqlType`]: crate::orm::SqlType
48
49use std::path::{Path, PathBuf};
50
51use sqlx::{PgPool, Row, SqlitePool};
52use umbral_casing::{pascal_case_from_table, to_snake_case};
53
54use crate::migrate::{self, Column, MigrationFile, ModelMeta, Operation, Snapshot};
55use crate::orm::SqlType;
56
57/// Default plugin name the generated migration is filed under. Matches
58/// [`crate::migrate::APP_PLUGIN_NAME`] so the produced
59/// `0001_initial.json` lands inside the same `migrations/app/`
60/// directory the M5 engine reads from. M7 lifts this once the user can
61/// choose a real plugin name via `--plugin`.
62pub const INSPECTED_PLUGIN_NAME: &str = migrate::APP_PLUGIN_NAME;
63
64/// Default filename for the introspected initial migration.
65pub const INITIAL_MIGRATION_ID: &str = "0001_initial";
66
67/// The introspection result. A flat list of tables, each with its
68/// columns in declaration order. Indexes and foreign keys are omitted
69/// at M6 v1 (the field types they target don't exist yet).
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct IntrospectedSchema {
72 pub tables: Vec<IntrospectedTable>,
73}
74
75/// One introspected table.
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub struct IntrospectedTable {
78 /// The SQL table name as it appears in the database.
79 pub table: String,
80 /// The struct name the renderer will use. Defaults to the table
81 /// name in UpperCamelCase; the M6 v1 importer does not strip
82 /// prefixes (deferred to M7's `--strip-prefix` flag).
83 pub name: String,
84 /// One descriptor per column, in declaration order.
85 pub columns: Vec<IntrospectedColumn>,
86}
87
88/// One introspected column.
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub struct IntrospectedColumn {
91 pub name: String,
92 pub ty: SqlType,
93 pub primary_key: bool,
94 pub nullable: bool,
95}
96
97/// Errors `inspectdb` can produce. Carries enough detail for the CLI
98/// to print a single-line diagnostic with the offending table and
99/// column.
100#[derive(Debug)]
101pub enum InspectError {
102 /// IO error reading or writing a generated file.
103 Io(std::io::Error),
104 /// JSON serialisation error pretty-printing the generated migration.
105 Json(serde_json::Error),
106 /// sqlx error executing the introspection queries.
107 Sqlx(sqlx::Error),
108 /// The introspection ran but found no tables. Surfaced so the CLI
109 /// can print "nothing to import" instead of writing empty files.
110 NoTables,
111 /// A column's SQL type isn't in the M6 v1 mapping table. Holds the
112 /// table / column / raw SQL type so the user can decide whether to
113 /// add a field type, edit the generated code, or wait for the
114 /// catalogue to grow.
115 UnsupportedColumnType {
116 table: String,
117 column: String,
118 sql_type: String,
119 },
120 /// Pass-through for migration-engine failures (e.g. recording the
121 /// initial migration as applied).
122 Migrate(migrate::MigrateError),
123}
124
125impl std::fmt::Display for InspectError {
126 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127 match self {
128 InspectError::Io(e) => write!(f, "umbral inspectdb: io: {e}"),
129 InspectError::Json(e) => write!(f, "umbral inspectdb: json: {e}"),
130 InspectError::Sqlx(e) => write!(f, "umbral inspectdb: sqlx: {e}"),
131 InspectError::NoTables => write!(
132 f,
133 "umbral inspectdb: no tables found in the database (nothing to import)"
134 ),
135 InspectError::UnsupportedColumnType {
136 table,
137 column,
138 sql_type,
139 } => write!(
140 f,
141 "umbral inspectdb: column `{table}.{column}` has unsupported SQL type `{sql_type}`; \
142 add a matching SqlType variant or edit the generated model by hand"
143 ),
144 InspectError::Migrate(e) => write!(f, "umbral inspectdb: migrate: {e}"),
145 }
146 }
147}
148
149impl std::error::Error for InspectError {}
150
151impl From<std::io::Error> for InspectError {
152 fn from(e: std::io::Error) -> Self {
153 Self::Io(e)
154 }
155}
156
157impl From<sqlx::Error> for InspectError {
158 fn from(e: sqlx::Error) -> Self {
159 Self::Sqlx(e)
160 }
161}
162
163impl From<serde_json::Error> for InspectError {
164 fn from(e: serde_json::Error) -> Self {
165 Self::Json(e)
166 }
167}
168
169impl From<migrate::MigrateError> for InspectError {
170 fn from(e: migrate::MigrateError) -> Self {
171 Self::Migrate(e)
172 }
173}
174
175/// CLI-driven options. The CLI subcommand wires its flags into this
176/// struct and hands it to [`inspectdb`].
177#[derive(Debug, Clone)]
178pub struct InspectOptions {
179 /// Directory the generated files are written under. `models.rs`
180 /// lands at the root; the migration lands at
181 /// `<output>/migrations/<INSPECTED_PLUGIN_NAME>/0001_initial.json`.
182 pub output: PathBuf,
183 /// Mark `0001_initial` as applied in `umbral_migrations` after
184 /// writing it. The right default when the target database already
185 /// has tables (running the migration would fail). Off for empty
186 /// databases.
187 pub mark_applied: bool,
188}
189
190/// Summary returned to the CLI. Counts that the caller can render as a
191/// one-line "imported N tables / M columns" message.
192#[derive(Debug, Clone, Default)]
193pub struct InspectReport {
194 pub tables: usize,
195 pub columns: usize,
196 pub models_path: PathBuf,
197 pub migration_path: PathBuf,
198}
199
200// =========================================================================
201// Top-level entry points. Bodies filled in by the M6 fan-out subagents.
202// =========================================================================
203
204/// Run the full `inspectdb` pipeline against the ambient pool:
205/// introspect (dispatching on the active backend), render `models.rs`,
206/// render `0001_initial.json`, write both to `opts.output`, and
207/// optionally mark applied.
208///
209/// Phase 3 of the Postgres rollout taught this entry point to dispatch
210/// on `DbPool` — the SQLite path uses `PRAGMA table_info`; the
211/// Postgres path uses `information_schema`. The downstream pipeline
212/// (rendering + writing) is backend-agnostic and runs the same way.
213pub async fn inspectdb(opts: InspectOptions) -> Result<InspectReport, InspectError> {
214 let schema = match crate::db::pool_dispatched() {
215 crate::db::DbPool::Sqlite(pool) => introspect_pool(pool).await?,
216 crate::db::DbPool::Postgres(pool) => introspect_pool_pg(pool).await?,
217 };
218 if schema.tables.is_empty() {
219 return Err(InspectError::NoTables);
220 }
221
222 let models_src = render_models(&schema);
223 let migration = render_initial_migration(&schema);
224 let report = write_outputs(&opts.output, &models_src, &migration).await?;
225
226 if opts.mark_applied {
227 let hash = migration.snapshot_after.hash();
228 migrate::record_applied(&migration.plugin, &migration.id, &hash).await?;
229 }
230
231 Ok(report)
232}
233
234/// Introspect the schema reachable through the given SQLite pool.
235/// Reads `sqlite_master` for table names and `PRAGMA table_info(...)`
236/// for column descriptors. Skips internal tables (`sqlite_*`,
237/// `umbral_migrations`).
238pub async fn introspect_pool(pool: &SqlitePool) -> Result<IntrospectedSchema, InspectError> {
239 // List user tables in lexical name order. `sqlite_master` carries
240 // both tables and indexes; the `type = 'table'` predicate scopes the
241 // result to tables. The skip-list takes out SQLite's internal
242 // bookkeeping (`sqlite_%`) and umbral's own tracking table, which
243 // would otherwise loop back through the migration engine.
244 let table_rows = sqlx::query(
245 "SELECT name FROM sqlite_master \
246 WHERE type = 'table' \
247 AND name NOT LIKE 'sqlite_%' \
248 AND name <> 'umbral_migrations' \
249 ORDER BY name",
250 )
251 .fetch_all(pool)
252 .await?;
253
254 let mut tables: Vec<IntrospectedTable> = Vec::with_capacity(table_rows.len());
255 for row in table_rows {
256 let table: String = row.try_get("name")?;
257 let columns = introspect_columns(pool, &table).await?;
258 tables.push(IntrospectedTable {
259 name: pascal_case_from_table(&table),
260 table,
261 columns,
262 });
263 }
264
265 Ok(IntrospectedSchema { tables })
266}
267
268/// Introspect the schema reachable through the given Postgres pool.
269/// Reads `information_schema.tables` for table names,
270/// `information_schema.columns` for column descriptors, and joins
271/// `information_schema.table_constraints` + `key_column_usage` for
272/// the primary-key flag. Scoped to the `public` schema by default;
273/// internal Postgres schemas and umbral's own `umbral_migrations`
274/// tracking table are skipped.
275///
276/// The output is the same `IntrospectedSchema` the SQLite path
277/// produces — downstream rendering doesn't know which backend the
278/// data came from.
279pub async fn introspect_pool_pg(pool: &PgPool) -> Result<IntrospectedSchema, InspectError> {
280 // List user tables in the `public` schema, lexically. Postgres
281 // information_schema is standard SQL; pg_catalog is the lower-
282 // level surface but information_schema is portable across
283 // Postgres-compatible servers and carries everything the
284 // SqlType catalogue needs.
285 let table_rows: Vec<(String,)> = sqlx::query_as(
286 "SELECT table_name FROM information_schema.tables \
287 WHERE table_schema = 'public' \
288 AND table_type = 'BASE TABLE' \
289 AND table_name <> 'umbral_migrations' \
290 ORDER BY table_name",
291 )
292 .fetch_all(pool)
293 .await?;
294
295 let mut tables: Vec<IntrospectedTable> = Vec::with_capacity(table_rows.len());
296 for (table,) in table_rows {
297 let columns = introspect_columns_pg(pool, &table).await?;
298 tables.push(IntrospectedTable {
299 name: pascal_case_from_table(&table),
300 table,
301 columns,
302 });
303 }
304
305 Ok(IntrospectedSchema { tables })
306}
307
308/// Read one Postgres table's columns via `information_schema.columns`,
309/// plus a primary-key join over `information_schema.table_constraints`
310/// and `key_column_usage`. Columns come back in declaration order
311/// (`ordinal_position`).
312///
313/// `data_type` is the normalised type string Postgres exposes through
314/// information_schema (e.g. `"integer"`, `"character varying"`,
315/// `"timestamp with time zone"`); [`map_postgres_type`] maps it to the
316/// umbral `SqlType` catalogue. Anything unmapped surfaces as
317/// [`InspectError::UnsupportedColumnType`] with the table / column
318/// names and the raw type string.
319async fn introspect_columns_pg(
320 pool: &PgPool,
321 table: &str,
322) -> Result<Vec<IntrospectedColumn>, InspectError> {
323 // The primary-key lookup runs once per table. The set is typically
324 // tiny (one column for most tables, a handful for composite keys)
325 // so collecting it up-front into a Vec keeps the inner column loop
326 // O(columns × pk_columns) without an extra round trip per column.
327 let pk_rows: Vec<(String,)> = sqlx::query_as(
328 "SELECT kcu.column_name \
329 FROM information_schema.table_constraints tc \
330 JOIN information_schema.key_column_usage kcu \
331 ON tc.constraint_name = kcu.constraint_name \
332 AND tc.table_schema = kcu.table_schema \
333 WHERE tc.constraint_type = 'PRIMARY KEY' \
334 AND tc.table_schema = 'public' \
335 AND tc.table_name = $1",
336 )
337 .bind(table)
338 .fetch_all(pool)
339 .await?;
340 let pk_columns: std::collections::HashSet<String> = pk_rows.into_iter().map(|(c,)| c).collect();
341
342 // `udt_name` carries the underlying type name even when `data_type`
343 // is the abstract `"ARRAY"` placeholder. For `bigint[]` the
344 // information_schema reports data_type = "ARRAY" and udt_name =
345 // "_int8" (underscore prefix marks the array variant in pg_type).
346 // For non-array columns udt_name carries the same physical name
347 // (`int8`, `text`, etc.) but `data_type` is the canonical match
348 // key we already lookup against.
349 let column_rows: Vec<(String, String, String, String)> = sqlx::query_as(
350 "SELECT column_name, data_type, is_nullable, udt_name \
351 FROM information_schema.columns \
352 WHERE table_schema = 'public' AND table_name = $1 \
353 ORDER BY ordinal_position",
354 )
355 .bind(table)
356 .fetch_all(pool)
357 .await?;
358
359 let mut columns: Vec<IntrospectedColumn> = Vec::with_capacity(column_rows.len());
360 for (name, data_type, is_nullable, udt_name) in column_rows {
361 let ty = if data_type.eq_ignore_ascii_case("ARRAY") {
362 // Element type comes from udt_name with the leading
363 // underscore stripped. `_int8` -> int8 -> ArrayElement::BigInt.
364 let elem_name = udt_name.strip_prefix('_').unwrap_or(udt_name.as_str());
365 map_postgres_array_element(elem_name).ok_or_else(|| {
366 InspectError::UnsupportedColumnType {
367 table: table.to_string(),
368 column: name.clone(),
369 sql_type: format!("ARRAY of {elem_name}"),
370 }
371 })?
372 } else {
373 map_postgres_type(&data_type).ok_or_else(|| InspectError::UnsupportedColumnType {
374 table: table.to_string(),
375 column: name.clone(),
376 sql_type: data_type.clone(),
377 })?
378 };
379 let primary_key = pk_columns.contains(&name);
380 // Postgres `is_nullable` is the string "YES" or "NO". A primary
381 // key is non-nullable by definition (the server enforces it);
382 // we force `nullable = false` so a SERIAL/BIGSERIAL PK round-
383 // trips through the M3 derive (which rejects `Option<T>` PKs)
384 // matching the behavioural fix already in place on the SQLite
385 // path.
386 let nullable = if primary_key {
387 false
388 } else {
389 is_nullable.eq_ignore_ascii_case("YES")
390 };
391 columns.push(IntrospectedColumn {
392 name,
393 ty,
394 primary_key,
395 nullable,
396 });
397 }
398
399 Ok(columns)
400}
401
402/// Map a Postgres array's element-type name (from `udt_name` with the
403/// leading underscore stripped) to a [`SqlType::Array`] variant.
404///
405/// The `udt_name` column on `information_schema.columns` carries the
406/// physical type name from `pg_catalog.pg_type`; array variants are
407/// prefixed with `_` (`_int8` for `bigint[]`, `_text` for `text[]`).
408/// The caller strips the prefix; this function maps the remaining
409/// stem to the umbral `ArrayElement` catalogue.
410///
411/// Returns `None` if the element type isn't in
412/// `umbral::orm::ArrayElement` — chrono types, JSON, network types,
413/// and Postgres-specific types like NUMERIC fall outside Phase 4.1's
414/// array catalogue.
415fn map_postgres_array_element(elem: &str) -> Option<SqlType> {
416 use crate::orm::ArrayElement;
417 let kind = match elem.trim().to_ascii_lowercase().as_str() {
418 // Postgres physical type names (per pg_type.typname). The
419 // information_schema strips spaces from the data_type alias
420 // form, so we match the canonical lowercase names here.
421 "int2" => ArrayElement::SmallInt,
422 "int4" => ArrayElement::Integer,
423 "int8" => ArrayElement::BigInt,
424 "float4" => ArrayElement::Real,
425 "float8" => ArrayElement::Double,
426 "bool" => ArrayElement::Boolean,
427 "text" | "varchar" | "bpchar" => ArrayElement::Text,
428 "uuid" => ArrayElement::Uuid,
429 _ => return None,
430 };
431 Some(SqlType::Array(kind))
432}
433
434/// Map a Postgres `information_schema.columns.data_type` value to the
435/// umbral `SqlType` catalogue. Postgres normalises the strings, so the
436/// match table is the canonical names rather than the optional aliases
437/// `pg_type.typname` would expose. The inverse of
438/// [`crate::backend::PostgresBackend::map_type`] — both stay in sync
439/// as new `SqlType` variants land.
440///
441/// Returns `None` on anything not in the catalogue (Postgres-specific
442/// types like `numeric`, `jsonb`, `bytea`, arrays, custom domains).
443/// The caller turns that into `UnsupportedColumnType` with enough
444/// context for the operator to fix by hand or wait for the field-
445/// type catalogue to grow.
446fn map_postgres_type(raw: &str) -> Option<SqlType> {
447 let normalised = raw.trim().to_ascii_lowercase();
448 match normalised.as_str() {
449 "smallint" => Some(SqlType::SmallInt),
450 "integer" => Some(SqlType::Integer),
451 "bigint" => Some(SqlType::BigInt),
452 "real" => Some(SqlType::Real),
453 "double precision" => Some(SqlType::Double),
454 "boolean" => Some(SqlType::Boolean),
455 // information_schema reports `text`, `character varying`, and
456 // `character` for VARCHAR / CHAR / TEXT. All round-trip through
457 // umbral's Text variant.
458 "text" | "character varying" | "character" => Some(SqlType::Text),
459 "date" => Some(SqlType::Date),
460 // Both timezone variants of TIME land on umbral's Time. The
461 // distinction is preserved in the database; the client-side
462 // type system doesn't model it yet.
463 "time without time zone" | "time with time zone" => Some(SqlType::Time),
464 // Likewise both timezone variants of TIMESTAMP land on
465 // Timestamptz. The umbral catalogue picks the with-tz variant
466 // as the default so chrono::DateTime<Utc> is the natural Rust
467 // type for either.
468 "timestamp without time zone" | "timestamp with time zone" => Some(SqlType::Timestamptz),
469 "uuid" => Some(SqlType::Uuid),
470 // Both `json` and `jsonb` round-trip to umbral's portable Json
471 // variant. The DDL renderer chose `jsonb` on the way out; if a
472 // pre-existing database stores values as `json` (the unindexed
473 // text variant), inspectdb still recognises it on the way in.
474 // A re-migrate would normalize to `jsonb` if the user re-creates
475 // the column, which matches the M5 declare-and-migrate loop.
476 "json" | "jsonb" => Some(SqlType::Json),
477 // Phase 4.4: Postgres network address types.
478 "inet" => Some(SqlType::Inet),
479 "cidr" => Some(SqlType::Cidr),
480 "macaddr" => Some(SqlType::MacAddr),
481 // gaps2 #70: text-backed Postgres types. `bit varying` and bare
482 // `bit` (the information_schema sometimes reports `bit` for a
483 // BIT(n)) both round-trip to the `Bit` variant.
484 "xml" => Some(SqlType::Xml),
485 "ltree" => Some(SqlType::Ltree),
486 "bit" | "bit varying" | "varbit" => Some(SqlType::Bit),
487 "tsvector" => Some(SqlType::FullText),
488 "bytea" => Some(SqlType::Bytes),
489 _ => None,
490 }
491}
492
493/// Read one table's columns via `PRAGMA table_info`. The PRAGMA returns
494/// `(cid, name, type, notnull, dflt_value, pk)` rows in declaration
495/// order, sorted defensively by `cid` so a downstream change to the
496/// PRAGMA's behaviour doesn't silently scramble field order.
497async fn introspect_columns(
498 pool: &SqlitePool,
499 table: &str,
500) -> Result<Vec<IntrospectedColumn>, InspectError> {
501 // The PRAGMA name can't be bound as a parameter, but it also can't
502 // contain user-supplied input here: `table` comes from `sqlite_master`
503 // and matches an existing table identifier by construction.
504 let sql = format!("PRAGMA table_info(\"{}\")", table.replace('"', "\"\""));
505 let mut rows = sqlx::query(&sql).fetch_all(pool).await?;
506 rows.sort_by_key(|r| r.try_get::<i64, _>("cid").unwrap_or(0));
507
508 let mut columns: Vec<IntrospectedColumn> = Vec::with_capacity(rows.len());
509 for row in rows {
510 let name: String = row.try_get("name")?;
511 let raw_type: String = row.try_get("type")?;
512 let notnull: i64 = row.try_get("notnull")?;
513 let pk: i64 = row.try_get("pk")?;
514 let ty = map_sqlite_type(&raw_type).ok_or_else(|| InspectError::UnsupportedColumnType {
515 table: table.to_string(),
516 column: name.clone(),
517 sql_type: raw_type.clone(),
518 })?;
519 let primary_key = pk != 0;
520 // SQLite's `PRAGMA table_info` reports `notnull = 0` for
521 // `INTEGER PRIMARY KEY` columns because they're aliases for
522 // ROWID (which SQLite manages internally). The columns are
523 // nonetheless guaranteed non-null: SQLite refuses to insert
524 // NULL into a primary key. Forcing `nullable = false` here
525 // makes the generated `#[derive(Model)]` compile (the M3
526 // derive's PK detection requires a non-`Option` PK field)
527 // and matches what the database actually enforces.
528 let nullable = if primary_key { false } else { notnull == 0 };
529 columns.push(IntrospectedColumn {
530 name,
531 ty,
532 primary_key,
533 nullable,
534 });
535 }
536 Ok(columns)
537}
538
539/// Map a raw SQLite type string to the M6 v1 [`SqlType`] catalogue.
540/// Case-insensitive; trailing `(n)` or `(p,s)` width parameters are
541/// stripped before matching so `VARCHAR(255)` and `NUMERIC(10,2)` come
542/// through as `varchar` and `numeric`. Returns `None` on anything not
543/// in the table; the caller turns that into
544/// [`InspectError::UnsupportedColumnType`] with the table and column
545/// names attached.
546fn map_sqlite_type(raw: &str) -> Option<SqlType> {
547 let head = match raw.split_once('(') {
548 Some((before, _)) => before,
549 None => raw,
550 };
551 let normalised = head.trim().to_ascii_lowercase();
552 match normalised.as_str() {
553 "smallint" | "int2" => Some(SqlType::SmallInt),
554 "int" | "integer" | "int4" => Some(SqlType::Integer),
555 "bigint" | "int8" => Some(SqlType::BigInt),
556 "real" | "float" | "float4" => Some(SqlType::Real),
557 "double" | "double precision" | "float8" => Some(SqlType::Double),
558 "boolean" | "bool" => Some(SqlType::Boolean),
559 "text" | "varchar" | "char" | "clob" | "character" | "varying character" | "nchar"
560 | "nvarchar" => Some(SqlType::Text),
561 "date" => Some(SqlType::Date),
562 "time" => Some(SqlType::Time),
563 "timestamp" | "timestamptz" | "datetime" => Some(SqlType::Timestamptz),
564 "uuid" => Some(SqlType::Uuid),
565 // SQLite doesn't have a native JSON column type, but a user
566 // declaring `CREATE TABLE t (data JSON)` parses the type-name
567 // verbatim into `sqlite_master` and `PRAGMA table_info`. Treat
568 // that as a hint that the column holds JSON content and route
569 // it through `SqlType::Json` (which lowers to TEXT on SQLite
570 // anyway).
571 "json" | "jsonb" => Some(SqlType::Json),
572 "blob" | "bytea" => Some(SqlType::Bytes),
573 _ => None,
574 }
575}
576
577// `derive_table_name` (was `to_snake_case`) and `pascal_case` (now
578// `pascal_case_from_table`) are imported from `umbral_casing` at the top
579// of this file. The local copies were removed in the gaps2 #77 refactor.
580
581/// Render the introspected schema as the contents of a `models.rs`
582/// file. The output is one `#[derive(Model)]` struct per table, with
583/// fields in declaration order and the `#[umbral(table = "…")]`
584/// attribute set when the struct name differs from the SQL table.
585///
586/// Structs are emitted in alphabetical order by struct name so a
587/// re-run against an unchanged schema produces a byte-identical file.
588/// Field-type rendering uses fully-qualified `chrono::*` / `uuid::*`
589/// paths so no extra `use` lines are needed at the top of the file.
590pub fn render_models(schema: &IntrospectedSchema) -> String {
591 let mut out = String::new();
592 out.push_str(HEADER);
593
594 let mut tables: Vec<&IntrospectedTable> = schema.tables.iter().collect();
595 tables.sort_by(|a, b| a.name.cmp(&b.name));
596
597 for table in tables {
598 out.push('\n');
599 out.push_str(&render_one_struct(table));
600 }
601 out
602}
603
604/// Two-line module doc plus the single facade import every generated
605/// file needs. Kept as a constant so the empty-schema path emits
606/// exactly the header and nothing else.
607const HEADER: &str = "\
608//! Generated by `umbral inspectdb`. Wire each struct into your App
609//! builder with `.model::<StructName>()`. Re-run `inspectdb` to
610//! regenerate; edits made by hand will be lost.
611
612use umbral::prelude::*;
613";
614
615/// Render a single `#[derive(Model)]` struct for one introspected table.
616/// The `#[umbral(table = "...")]` attribute is emitted only when the
617/// derive's auto-derived table name (snake_case of the struct name)
618/// doesn't equal the SQL table name. For the typical snake_case shape
619/// (`blog_post` -> `BlogPost` -> derive computes `"blog_post"`), the
620/// attribute is redundant and is left off. For unusual SQL casings
621/// (`POSTS` -> `Posts` -> derive computes `"posts"` not `"POSTS"`),
622/// the attribute is emitted and the M3.1 derive picks it up to
623/// override the default. See `umbral-macros/src/lib.rs` for the
624/// attribute parser.
625fn render_one_struct(table: &IntrospectedTable) -> String {
626 let mut out = String::new();
627 // `sqlx::FromRow` is required because the `Model` trait bounds it
628 // as a supertrait (see `crates/umbral-core/src/orm/model.rs`).
629 // Without it, `#[derive(Model)]` emits an `impl Model` whose
630 // sqlx::FromRow supertrait isn't satisfied and the generated file
631 // fails to compile.
632 out.push_str("#[derive(Debug, Clone, sqlx::FromRow, Model)]\n");
633 if to_snake_case(&table.name) != table.table {
634 out.push_str(&format!("#[umbral(table = \"{}\")]\n", table.table));
635 }
636 out.push_str(&format!("pub struct {} {{\n", table.name));
637 for column in &table.columns {
638 out.push_str(&format!(
639 " pub {}: {},\n",
640 column.name,
641 render_field_type(column.ty, column.nullable),
642 ));
643 }
644 out.push_str("}\n");
645 out
646}
647
648/// Map `(SqlType, nullable)` to the Rust type string the derive macro's
649/// `classify_field_type` accepts. Mirrors the table in
650/// `umbral-macros/src/lib.rs` (see `FieldKind` for the full catalogue).
651fn render_field_type(ty: SqlType, nullable: bool) -> String {
652 let base = match ty {
653 SqlType::SmallInt => "i16".to_string(),
654 SqlType::Integer => "i32".to_string(),
655 SqlType::BigInt => "i64".to_string(),
656 SqlType::Real => "f32".to_string(),
657 SqlType::Double => "f64".to_string(),
658 SqlType::Boolean => "bool".to_string(),
659 SqlType::Text => "String".to_string(),
660 SqlType::Date => "chrono::NaiveDate".to_string(),
661 SqlType::Time => "chrono::NaiveTime".to_string(),
662 SqlType::Timestamptz => "chrono::DateTime<chrono::Utc>".to_string(),
663 SqlType::Uuid => "uuid::Uuid".to_string(),
664 SqlType::Json => "serde_json::Value".to_string(),
665 // Recurse through the element's SqlType. Wrapping in `Vec<...>`
666 // matches the derive's catalogue: a `Vec<i64>` declares an
667 // `Array(ArrayElement::BigInt)` field.
668 SqlType::Array(elem) => format!("Vec<{}>", render_field_type(elem.to_sql_type(), false)),
669 // Phase 4.4: Postgres network address types. Both `Inet` and
670 // `Cidr` round-trip through `ipnetwork::IpNetwork`; `MacAddr`
671 // uses the `mac_address` crate.
672 SqlType::Inet => "ipnetwork::IpNetwork".to_string(),
673 SqlType::Cidr => "ipnetwork::IpNetwork".to_string(),
674 SqlType::MacAddr => "mac_address::MacAddress".to_string(),
675 // gaps2 #70: text-backed Postgres types surface as `String`.
676 // inspectdb can't recover which `#[umbral(...)]` attr produced
677 // the column (the attr lives only in the source model, not the
678 // DB), so the generated model is a plain `String`; the user
679 // re-adds `#[umbral(xml)]` / `#[umbral(ltree)]` / `#[umbral(bit)]`
680 // if they want the native type back on a re-migrate.
681 SqlType::Xml => "String".to_string(),
682 SqlType::Ltree => "String".to_string(),
683 SqlType::Bit => "String".to_string(),
684 SqlType::FullText => "umbral::orm::TsVector".to_string(),
685 // ForeignKey inspectdb renders as i64 for now; the FK relationship
686 // introspection that would emit ForeignKey<T> is deferred.
687 SqlType::ForeignKey => "i64".to_string(),
688 // BLOB / BYTEA columns surface as Vec<u8> in user code.
689 SqlType::Bytes => "Vec<u8>".to_string(),
690 // BUG-10: NUMERIC introspection renders as
691 // `rust_decimal::Decimal`. inspectdb reads the column type
692 // from Postgres' `information_schema`; the resulting
693 // model imports use this exact path.
694 SqlType::Decimal => "rust_decimal::Decimal".to_string(),
695 };
696 let base = base.as_str();
697 if nullable {
698 format!("Option<{base}>")
699 } else {
700 base.to_string()
701 }
702}
703
704/// Render the introspected schema as a [`MigrationFile`] suitable for
705/// writing to `migrations/<INSPECTED_PLUGIN_NAME>/0001_initial.json`.
706/// One `CreateTable` per introspected table; `snapshot_after` captures
707/// the imported state so subsequent `make_in` runs diff against it.
708///
709/// Filled in by subagent B.
710pub fn render_initial_migration(schema: &IntrospectedSchema) -> MigrationFile {
711 let mut models: Vec<ModelMeta> = schema
712 .tables
713 .iter()
714 .map(|t| ModelMeta {
715 name: t.name.clone(),
716 table: t.table.clone(),
717 fields: t.columns.iter().map(Column::from).collect(),
718 display: t.name.clone(),
719 icon: "database".to_string(),
720 database: None,
721 singleton: false,
722 unique_together: Vec::new(),
723 indexes: Vec::new(),
724 ordering: Vec::new(),
725 m2m_relations: Vec::new(),
726 soft_delete: false,
727 // inspectdb has no plugin attribute to read; default to "app".
728 app_label: "app".to_string(),
729 })
730 .collect();
731 models.sort_by(|a, b| a.name.cmp(&b.name));
732
733 let operations = schema
734 .tables
735 .iter()
736 .map(|t| Operation::CreateTable {
737 table: t.table.clone(),
738 columns: t.columns.iter().map(Column::from).collect(),
739 unique_together: Vec::new(),
740 indexes: Vec::new(),
741 })
742 .collect();
743
744 MigrationFile {
745 id: INITIAL_MIGRATION_ID.to_string(),
746 plugin: INSPECTED_PLUGIN_NAME.to_string(),
747 depends_on: Vec::new(),
748 operations,
749 snapshot_after: Snapshot { models },
750 }
751}
752
753/// Write `models.rs` and the initial migration to `output`. Creates
754/// `output/` and `output/migrations/<INSPECTED_PLUGIN_NAME>/` as
755/// needed. Returns the report carrying the table / column counts and
756/// the paths.
757///
758/// The migration is pretty-printed so the file diffs cleanly when a
759/// later `makemigrations` writes the next migration alongside.
760pub async fn write_outputs(
761 output: &Path,
762 models_src: &str,
763 migration: &MigrationFile,
764) -> Result<InspectReport, InspectError> {
765 std::fs::create_dir_all(output)?;
766
767 let models_path = output.join("models.rs");
768 std::fs::write(&models_path, models_src)?;
769
770 let plugin_dir = output.join("migrations").join(INSPECTED_PLUGIN_NAME);
771 std::fs::create_dir_all(&plugin_dir)?;
772
773 let migration_path = plugin_dir.join(format!("{}.json", migration.id));
774 let json = serde_json::to_string_pretty(migration)?;
775 std::fs::write(&migration_path, json)?;
776
777 let (tables, columns) =
778 migration
779 .operations
780 .iter()
781 .fold((0usize, 0usize), |(t, c), op| match op {
782 Operation::CreateTable { columns, .. } => (t + 1, c + columns.len()),
783 Operation::CreateM2MTable { .. } => (t + 1, c + 2),
784 Operation::DropTable { .. }
785 | Operation::DropM2MTable { .. }
786 | Operation::AddColumn { .. }
787 | Operation::DropColumn { .. }
788 | Operation::AlterColumn { .. }
789 | Operation::RenameTable { .. }
790 | Operation::RenameColumn { .. }
791 | Operation::RunSql { .. } => (t, c),
792 });
793
794 Ok(InspectReport {
795 tables,
796 columns,
797 models_path,
798 migration_path,
799 })
800}
801
802// =========================================================================
803// Internal helpers.
804// =========================================================================
805
806impl From<&IntrospectedColumn> for Column {
807 fn from(c: &IntrospectedColumn) -> Self {
808 Self {
809 name: c.name.clone(),
810 ty: c.ty,
811 primary_key: c.primary_key,
812 nullable: c.nullable,
813 fk_target: None,
814 noform: false,
815 // inspectdb introspects no FK yet (`fk_target: None`), so a
816 // real DB constraint maps to the default `true`.
817 db_constraint: true,
818 noedit: false,
819 is_string_repr: false,
820 max_length: 0,
821 choices: Vec::new(),
822 choice_labels: Vec::new(),
823 default: String::new(),
824 is_multichoice: false,
825 // inspectdb does not introspect UNIQUE constraints yet
826 // (gap #65 ships the declare-side first; inspect-side
827 // lands when there's a real porting case that needs it).
828 unique: false,
829 on_delete: crate::orm::FkAction::NoAction,
830 on_update: crate::orm::FkAction::NoAction,
831 index: false,
832 auto_now_add: false,
833 auto_now: false,
834 help: String::new(),
835 example: String::new(),
836 widget: None,
837 supported_backends: Vec::new(),
838 min: None,
839 max: None,
840 text_format: ::core::option::Option::None,
841 slug_from: ::core::option::Option::None,
842 }
843 }
844}
845
846#[cfg(test)]
847mod tests {
848 use super::*;
849
850 fn col(name: &str, ty: SqlType, primary_key: bool, nullable: bool) -> IntrospectedColumn {
851 IntrospectedColumn {
852 name: name.to_string(),
853 ty,
854 primary_key,
855 nullable,
856 }
857 }
858
859 #[test]
860 fn empty_schema_renders_header_only() {
861 let out = render_models(&IntrospectedSchema { tables: Vec::new() });
862 assert_eq!(out, HEADER);
863 }
864
865 #[test]
866 fn snake_case_table_skips_attribute_when_derive_round_trips() {
867 let schema = IntrospectedSchema {
868 tables: vec![IntrospectedTable {
869 table: "blog_post".to_string(),
870 name: "BlogPost".to_string(),
871 columns: vec![
872 col("id", SqlType::BigInt, true, false),
873 col("title", SqlType::Text, false, false),
874 ],
875 }],
876 };
877 let out = render_models(&schema);
878 // `BlogPost` snake_cases to `blog_post` via the derive, so the
879 // attribute is redundant and is left off. This keeps the
880 // generated file compatible with the M3 derive, which doesn't
881 // yet recognise `#[umbral(...)]` attributes.
882 assert!(!out.contains("#[umbral(table"));
883 assert!(out.contains("pub struct BlogPost {"));
884 assert!(out.contains("pub id: i64,"));
885 assert!(out.contains("pub title: String,"));
886 }
887
888 #[test]
889 fn lowercase_single_word_table_skips_attribute() {
890 // `post` -> `Post` -> derive snake_cases to `"post"`, matches
891 // the source table verbatim, so the attribute is left off.
892 let schema = IntrospectedSchema {
893 tables: vec![IntrospectedTable {
894 table: "post".to_string(),
895 name: "Post".to_string(),
896 columns: vec![col("id", SqlType::BigInt, true, false)],
897 }],
898 };
899 let out = render_models(&schema);
900 assert!(!out.contains("#[umbral(table"));
901 assert!(out.contains("pub struct Post {"));
902 }
903
904 #[test]
905 fn non_round_tripping_table_name_keeps_attribute() {
906 // SQL tables with names the derive's snake_case won't reach
907 // (e.g. uppercase, runs of capitals, leading digits) need the
908 // explicit attribute. This case is rare in real ports but
909 // the renderer should still cover it for the derive's eventual
910 // attribute-support landing.
911 let schema = IntrospectedSchema {
912 tables: vec![IntrospectedTable {
913 table: "POSTS".to_string(),
914 name: "Posts".to_string(),
915 columns: vec![col("id", SqlType::BigInt, true, false)],
916 }],
917 };
918 let out = render_models(&schema);
919 assert!(out.contains("#[umbral(table = \"POSTS\")]"));
920 }
921
922 #[test]
923 fn nullable_column_wraps_in_option() {
924 let schema = IntrospectedSchema {
925 tables: vec![IntrospectedTable {
926 table: "post".to_string(),
927 name: "Post".to_string(),
928 columns: vec![
929 col("id", SqlType::BigInt, true, false),
930 col("published_at", SqlType::Timestamptz, false, true),
931 ],
932 }],
933 };
934 let out = render_models(&schema);
935 assert!(out.contains("pub published_at: Option<chrono::DateTime<chrono::Utc>>,"));
936 }
937
938 #[test]
939 fn type_catalogue_renders_each_sql_type() {
940 let schema = IntrospectedSchema {
941 tables: vec![IntrospectedTable {
942 table: "kitchen_sink".to_string(),
943 name: "KitchenSink".to_string(),
944 columns: vec![
945 col("id", SqlType::BigInt, true, false),
946 col("small", SqlType::SmallInt, false, false),
947 col("medium", SqlType::Integer, false, false),
948 col("real_v", SqlType::Real, false, false),
949 col("double_v", SqlType::Double, false, false),
950 col("flag", SqlType::Boolean, false, false),
951 col("note", SqlType::Text, false, false),
952 col("day", SqlType::Date, false, false),
953 col("clock", SqlType::Time, false, false),
954 col("at", SqlType::Timestamptz, false, false),
955 col("uid", SqlType::Uuid, false, false),
956 ],
957 }],
958 };
959 let out = render_models(&schema);
960 for expected in [
961 "pub id: i64,",
962 "pub small: i16,",
963 "pub medium: i32,",
964 "pub real_v: f32,",
965 "pub double_v: f64,",
966 "pub flag: bool,",
967 "pub note: String,",
968 "pub day: chrono::NaiveDate,",
969 "pub clock: chrono::NaiveTime,",
970 "pub at: chrono::DateTime<chrono::Utc>,",
971 "pub uid: uuid::Uuid,",
972 ] {
973 assert!(out.contains(expected), "missing field render: {expected}");
974 }
975 }
976
977 #[test]
978 fn structs_are_sorted_by_name() {
979 let schema = IntrospectedSchema {
980 tables: vec![
981 IntrospectedTable {
982 table: "zebra".to_string(),
983 name: "Zebra".to_string(),
984 columns: vec![col("id", SqlType::BigInt, true, false)],
985 },
986 IntrospectedTable {
987 table: "antelope".to_string(),
988 name: "Antelope".to_string(),
989 columns: vec![col("id", SqlType::BigInt, true, false)],
990 },
991 ],
992 };
993 let out = render_models(&schema);
994 let antelope_at = out.find("struct Antelope").expect("Antelope rendered");
995 let zebra_at = out.find("struct Zebra").expect("Zebra rendered");
996 assert!(antelope_at < zebra_at);
997 }
998
999 #[test]
1000 fn header_carries_the_regen_warning_and_facade_import() {
1001 let out = render_models(&IntrospectedSchema { tables: Vec::new() });
1002 assert!(out.contains("Generated by `umbral inspectdb`"));
1003 assert!(out.contains("edits made by hand will be lost"));
1004 assert!(out.contains("use umbral::prelude::*;"));
1005 }
1006
1007 // --------------------------------------------------------------- //
1008 // Postgres type-mapping coverage (Phase 3). //
1009 // --------------------------------------------------------------- //
1010
1011 /// Every variant of the M5 SqlType catalogue has a mapping from
1012 /// the canonical Postgres `information_schema.columns.data_type`
1013 /// value back to the variant. Lockstep with
1014 /// `crate::backend::PostgresBackend::map_type` — if a SqlType
1015 /// variant lands, both `map_type` (outbound) and `map_postgres_type`
1016 /// (inbound) need an arm.
1017 #[test]
1018 fn map_postgres_type_covers_the_full_catalogue() {
1019 assert_eq!(map_postgres_type("smallint"), Some(SqlType::SmallInt));
1020 assert_eq!(map_postgres_type("integer"), Some(SqlType::Integer));
1021 assert_eq!(map_postgres_type("bigint"), Some(SqlType::BigInt));
1022 assert_eq!(map_postgres_type("real"), Some(SqlType::Real));
1023 assert_eq!(map_postgres_type("double precision"), Some(SqlType::Double));
1024 assert_eq!(map_postgres_type("boolean"), Some(SqlType::Boolean));
1025 assert_eq!(map_postgres_type("text"), Some(SqlType::Text));
1026 assert_eq!(
1027 map_postgres_type("character varying"),
1028 Some(SqlType::Text),
1029 "VARCHAR maps to Text",
1030 );
1031 assert_eq!(
1032 map_postgres_type("character"),
1033 Some(SqlType::Text),
1034 "CHAR maps to Text",
1035 );
1036 assert_eq!(map_postgres_type("date"), Some(SqlType::Date));
1037 assert_eq!(
1038 map_postgres_type("time without time zone"),
1039 Some(SqlType::Time),
1040 );
1041 assert_eq!(
1042 map_postgres_type("time with time zone"),
1043 Some(SqlType::Time)
1044 );
1045 assert_eq!(
1046 map_postgres_type("timestamp without time zone"),
1047 Some(SqlType::Timestamptz),
1048 );
1049 assert_eq!(
1050 map_postgres_type("timestamp with time zone"),
1051 Some(SqlType::Timestamptz),
1052 );
1053 assert_eq!(map_postgres_type("uuid"), Some(SqlType::Uuid));
1054 // Phase 4: both `json` and `jsonb` round-trip to the portable
1055 // `SqlType::Json` (DDL renders as `jsonb` on Postgres, TEXT on
1056 // SQLite).
1057 assert_eq!(map_postgres_type("json"), Some(SqlType::Json));
1058 assert_eq!(map_postgres_type("jsonb"), Some(SqlType::Json));
1059 // Phase 4.4: Postgres network address types.
1060 assert_eq!(map_postgres_type("inet"), Some(SqlType::Inet));
1061 assert_eq!(map_postgres_type("cidr"), Some(SqlType::Cidr));
1062 assert_eq!(map_postgres_type("macaddr"), Some(SqlType::MacAddr));
1063 // BLOB / BYTEA — Vec<u8> in Rust.
1064 assert_eq!(map_postgres_type("bytea"), Some(SqlType::Bytes));
1065 }
1066
1067 /// Postgres-specific types umbral doesn't model yet surface as
1068 /// `None` so the caller produces `UnsupportedColumnType` with the
1069 /// raw type string preserved. The catalogue lookups most likely to
1070 /// bite a port: numeric, bytea, arrays, network types. The
1071 /// user fixes by hand or waits for the catalogue to grow.
1072 ///
1073 /// Note `json`/`jsonb` are NOT on this list — Phase 4's `Json`
1074 /// SqlType variant maps both back to `SqlType::Json`. Likewise
1075 /// `inet`/`cidr`/`macaddr` left this list when Phase 4.4 added
1076 /// the matching SqlType variants. The companion arms in
1077 /// `map_postgres_type` are covered by
1078 /// `map_postgres_type_covers_the_full_catalogue` above.
1079 #[test]
1080 fn map_postgres_type_returns_none_for_postgres_only_types() {
1081 assert_eq!(map_postgres_type("numeric"), None);
1082 // `bytea` USED to be off-catalogue and returned None; once
1083 // SqlType::Bytes shipped, `bytea` started routing to it.
1084 // Asserted in the positive `map_postgres_type_covers_the_full_catalogue`
1085 // test instead.
1086 assert_eq!(map_postgres_type("ARRAY"), None);
1087 }
1088
1089 /// The mapping is case-insensitive on the input but matches against
1090 /// the canonical lowercase form information_schema reports. Whether
1091 /// the operator's DB returns `INTEGER` (uppercase, from a quoted
1092 /// type) or `integer` shouldn't matter.
1093 #[test]
1094 fn map_postgres_type_is_case_insensitive_on_input() {
1095 assert_eq!(map_postgres_type("INTEGER"), Some(SqlType::Integer));
1096 assert_eq!(map_postgres_type("Bigint"), Some(SqlType::BigInt));
1097 assert_eq!(map_postgres_type("UUID"), Some(SqlType::Uuid));
1098 }
1099
1100 /// Surrounding whitespace doesn't break the lookup. Trimming
1101 /// matches `map_sqlite_type`'s `trim()`; both functions parse
1102 /// values straight from a sqlx row and the trim is a cheap
1103 /// safety net.
1104 #[test]
1105 fn map_postgres_type_trims_whitespace() {
1106 assert_eq!(map_postgres_type(" bigint "), Some(SqlType::BigInt));
1107 }
1108}