Skip to main content

rivet/types/
mod.rs

1//! Rivet's internal type system.
2//!
3//! See `rivet_roadmap.md` §Epic 14 (Warehouse Load Layer). North Star —
4//! *"No silent type degradation"* — is enforced architecturally by
5//! routing every source-column type through the pipeline:
6//!
7//! ```text
8//! Source Native Type
9//!     ↓
10//! SourceColumn  ← what the driver knows about the column
11//!     ↓
12//! RivetType     ← canonical, vendor-independent type
13//!     ↓
14//! TypePolicy    ← strict / lossy / unsupported decisions  (Chunk 4)
15//!     ↓
16//! Arrow DataType + Field metadata  ← physical export type
17//! ```
18//!
19//! This module owns the first three boxes. The fourth (Arrow) is built by
20//! [`mapping::build_arrow_field`]; the fifth (TypePolicy) lands in Chunk 4
21//! of the type-safety milestones (see roadmap §18).
22//!
23//! ## Layer
24//!
25//! Layer-classification (ADR-0003): this module is **planning-layer** — it
26//! only describes / classifies types. It must not perform I/O, log
27//! metrics, or hold any pipeline state. Vendor mappers live in
28//! `crate::source::*` and call into this module.
29
30mod cursor;
31pub mod decimal;
32mod fidelity;
33mod mapping;
34mod override_type;
35pub mod policy;
36mod rivet_type;
37mod source_column;
38pub mod target;
39
40pub use cursor::CursorState;
41pub use fidelity::TypeFidelity;
42// Public surface for contract/integration tests; not referenced from the binary.
43#[allow(unused_imports)]
44pub use mapping::{TypeMapping, build_arrow_field, derive_fidelity, rivet_type_to_arrow};
45pub use override_type::parse_type_str;
46pub use rivet_type::{RivetType, TimeUnit};
47pub use source_column::SourceColumn;
48// ColumnOverride is the planned public API for column type overrides (Chunk 6).
49#[allow(unused_imports)]
50pub use source_column::ColumnOverride;
51
52/// Per-export column type overrides: column name → declared [`RivetType`].
53///
54/// Built at plan time from the `columns:` map in `rivet.yaml` (roadmap §8).
55/// Passed to [`crate::source::Source::export`] so drivers can use the
56/// declared precision/scale instead of autodetected (often unavailable) metadata.
57pub type ColumnOverrides = std::collections::HashMap<String, RivetType>;
58
59/// The override precedence shared by every source engine: a `columns:` override
60/// wins; otherwise fall back to the engine's autodetected type. Keeping it in
61/// one place is why PostgreSQL and MySQL resolution can't drift on precedence —
62/// each driver supplies only its own `autodetect` closure.
63pub fn resolve_or(
64    overrides: &ColumnOverrides,
65    column: &str,
66    autodetect: impl FnOnce() -> RivetType,
67) -> RivetType {
68    overrides.get(column).cloned().unwrap_or_else(autodetect)
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74
75    #[test]
76    fn resolve_or_prefers_override_then_autodetect() {
77        let mut ov = ColumnOverrides::new();
78        ov.insert(
79            "amount".into(),
80            RivetType::Decimal {
81                precision: 18,
82                scale: 2,
83            },
84        );
85        // Override wins, autodetect not even called.
86        assert_eq!(
87            resolve_or(&ov, "amount", || panic!(
88                "autodetect must not run when overridden"
89            )),
90            RivetType::Decimal {
91                precision: 18,
92                scale: 2
93            }
94        );
95        // No override → autodetect.
96        assert_eq!(
97            resolve_or(&ov, "other", || RivetType::Int64),
98            RivetType::Int64
99        );
100    }
101    use crate::types::mapping::{META_FIDELITY, META_LOGICAL_TYPE, META_NATIVE_TYPE};
102    use arrow::datatypes::DataType;
103
104    /// Top-level smoke test: feeding a typical PostgreSQL `payments` table
105    /// through `SourceColumn → RivetType → Arrow Field` produces the schema
106    /// shape demanded by the roadmap's §20 "Definition of Done":
107    ///
108    /// ```text
109    /// id          bigint          int64            Int64                  exact
110    /// amount      numeric(18,2)   decimal(18,2)    Decimal128(18,2)       exact
111    /// created_at  timestamptz     timestamp_tz     Timestamp(us, UTC)     exact
112    /// payload     jsonb           json             Utf8 + metadata        logical_string
113    /// ```
114    #[test]
115    fn end_to_end_payments_schema_matches_definition_of_done() {
116        let cols: Vec<(SourceColumn, RivetType)> = vec![
117            (
118                SourceColumn::simple("id", "bigint", false),
119                RivetType::Int64,
120            ),
121            (
122                SourceColumn::decimal("amount", "numeric", false, 18, 2),
123                RivetType::Decimal {
124                    precision: 18,
125                    scale: 2,
126                },
127            ),
128            (
129                SourceColumn::simple("created_at", "timestamptz", false),
130                RivetType::Timestamp {
131                    unit: TimeUnit::Microsecond,
132                    timezone: Some("UTC".into()),
133                },
134            ),
135            (
136                SourceColumn::simple("payload", "jsonb", true),
137                RivetType::Json,
138            ),
139        ];
140
141        let mappings: Vec<TypeMapping> = cols
142            .into_iter()
143            .map(|(s, t)| TypeMapping::from_source(&s, t))
144            .collect();
145
146        // Fidelity matrix mirrors the table in the Definition of Done.
147        assert_eq!(mappings[0].fidelity, TypeFidelity::Exact);
148        assert_eq!(mappings[1].fidelity, TypeFidelity::Exact);
149        assert_eq!(mappings[2].fidelity, TypeFidelity::Exact);
150        assert_eq!(mappings[3].fidelity, TypeFidelity::LogicalString);
151
152        // Arrow types are exactly what the roadmap demands — no Utf8 fallback for decimal.
153        assert_eq!(mappings[0].arrow_type, Some(DataType::Int64));
154        assert_eq!(mappings[1].arrow_type, Some(DataType::Decimal128(18, 2)));
155        assert!(matches!(
156            mappings[2].arrow_type,
157            Some(DataType::Timestamp(_, Some(_)))
158        ));
159        assert_eq!(mappings[3].arrow_type, Some(DataType::Utf8));
160
161        // Field-level metadata is preserved end-to-end.
162        let amount_field = build_arrow_field(&mappings[1]).expect("amount");
163        assert_eq!(
164            amount_field
165                .metadata()
166                .get(META_NATIVE_TYPE)
167                .map(String::as_str),
168            Some("numeric")
169        );
170        assert_eq!(
171            amount_field
172                .metadata()
173                .get(META_FIDELITY)
174                .map(String::as_str),
175            Some("exact")
176        );
177
178        let payload_field = build_arrow_field(&mappings[3]).expect("payload");
179        assert_eq!(
180            payload_field
181                .metadata()
182                .get(META_LOGICAL_TYPE)
183                .map(String::as_str),
184            Some("json")
185        );
186    }
187
188    /// Keep `rivet_type_to_arrow` / `derive_fidelity` re-exports live for
189    /// `tests/type_roundtrip` contract tests and downstream tooling.
190    #[test]
191    fn mapping_helpers_reexported_for_contract_tests() {
192        let dec = RivetType::Decimal {
193            precision: 18,
194            scale: 2,
195        };
196        assert!(matches!(
197            rivet_type_to_arrow(&dec),
198            Some(DataType::Decimal128(18, 2))
199        ));
200        assert_eq!(derive_fidelity(&dec), TypeFidelity::Exact);
201    }
202}