rivet/types/rivet_type.rs
1//! Rivet's canonical internal type system.
2//!
3//! See `rivet_roadmap.md` §Epic 14 (type safety). §5 — Type Mapping Pipeline, §6
4//! ("Internal Type System"). Every database driver maps its native column
5//! metadata into a [`RivetType`] *first*, then a single function maps
6//! [`RivetType`] to `arrow::DataType`. This is the architectural fix for the
7//! status-quo `DB type → Arrow type` shortcut that silently degrades types
8//! to `Utf8` (roadmap §5 "incorrect pipeline").
9//!
10//! Three invariants enforced by this enum:
11//!
12//! 1. **No silent precision loss** — `Decimal { precision, scale }` carries
13//! the declared precision/scale; constructing one without a known
14//! precision is impossible by construction. Unbounded numeric columns
15//! therefore *must* go through [`RivetType::Unsupported`] or be resolved
16//! by `TypePolicy::decimal.unbounded` (Chunk 4).
17//! 2. **No silent timezone loss** — `Timestamp { unit, timezone }` makes the
18//! timezone explicit; `timezone: None` means "no timezone semantics" and
19//! is *not* the same as `Some("UTC")`.
20//! 3. **No silent fallback to string** — anything Rivet can't safely map is
21//! represented as [`RivetType::Unsupported`] with a reason string, so
22//! the type-policy layer can decide whether to fail / warn / fallback.
23//!
24//! `serde::Serialize` is implemented so the type-report CLI (Chunk 5) can
25//! emit a stable JSON shape.
26
27use serde::{Deserialize, Serialize};
28
29/// Time-resolution unit for [`RivetType::Time`] / [`RivetType::Timestamp`].
30///
31/// Mirrors `arrow::datatypes::TimeUnit` but lives in the Rivet type system
32/// so we don't leak Arrow as a public API surface and so the type-report
33/// CLI can serialize the value without depending on Arrow's types.
34// Only `Microsecond` is produced by current drivers; the remaining variants
35// are live once ADBC / other drivers are added (roadmap §4).
36#[allow(dead_code)]
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
38#[serde(rename_all = "snake_case")]
39pub enum TimeUnit {
40 Second,
41 Millisecond,
42 Microsecond,
43 Nanosecond,
44}
45
46impl TimeUnit {
47 /// Stable lowercase string label for persistence and reports.
48 #[allow(dead_code)]
49 pub fn label(self) -> &'static str {
50 match self {
51 TimeUnit::Second => "second",
52 TimeUnit::Millisecond => "millisecond",
53 TimeUnit::Microsecond => "microsecond",
54 TimeUnit::Nanosecond => "nanosecond",
55 }
56 }
57}
58
59/// Canonical Rivet type. Every source-driver column maps into exactly one
60/// of these variants before we ever look at `arrow::DataType`.
61///
62/// Variants are kept narrow on purpose: adding a new variant is a deliberate
63/// architectural choice (it usually means "we figured out how to safely
64/// export a new shape of data"). Anything outside this enum becomes
65/// [`RivetType::Unsupported`] until the type system gains first-class
66/// support for it.
67// `UInt64` and `Decimal` are live in MySQL/PG mappers once column overrides
68// (Chunk 6) and exact decimal (Milestone 2) land; `Second`/`Millisecond`/
69// `Nanosecond` TimeUnit variants serve future drivers.
70#[allow(dead_code)]
71#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
72#[serde(tag = "kind", rename_all = "snake_case")]
73pub enum RivetType {
74 /// Boolean.
75 Bool,
76
77 /// Signed 16-bit integer (PostgreSQL `int2`, MySQL `smallint`).
78 Int16,
79 /// Signed 32-bit integer (PostgreSQL `int4`, MySQL `int`).
80 Int32,
81 /// Signed 64-bit integer (PostgreSQL `int8`, MySQL `bigint signed`).
82 Int64,
83 /// Unsigned 64-bit integer (MySQL `bigint unsigned`).
84 /// Target compatibility check required (BigQuery has no unsigned 64).
85 UInt64,
86
87 /// IEEE-754 32-bit float. Marked `exact-ish` in the roadmap mappings
88 /// because float→float is bit-exact but float→target may not round-trip.
89 Float32,
90 /// IEEE-754 64-bit float.
91 Float64,
92
93 /// Fixed-precision decimal. Precision/scale are *required* — the whole
94 /// point of having a separate variant from `Float64` is that we never
95 /// route money/decimal through floats. Mapped to Arrow Decimal128 when
96 /// `precision <= 38`, Decimal256 otherwise (roadmap §12).
97 Decimal {
98 /// Total number of significant digits.
99 precision: u8,
100 /// Digits to the right of the decimal point. Signed because
101 /// PostgreSQL `numeric` allows negative scale.
102 scale: i8,
103 },
104
105 /// Calendar date (no time, no timezone).
106 Date,
107
108 /// Time-of-day with the given resolution.
109 Time { unit: TimeUnit },
110
111 /// Timestamp with explicit timezone semantics. `timezone: None` means
112 /// "no timezone" (PostgreSQL `timestamp`, MySQL `datetime`);
113 /// `timezone: Some("UTC")` means timezone-normalized to UTC
114 /// (PostgreSQL `timestamptz`, MySQL `timestamp` with session tz=+00:00).
115 Timestamp {
116 unit: TimeUnit,
117 timezone: Option<String>,
118 },
119
120 /// Variable-length string (PostgreSQL `varchar`, `text`, `bpchar`,
121 /// `name`; MySQL `varchar`, `text`).
122 String,
123 /// Long-form text. Currently treated identically to `String` on the
124 /// Arrow layer (both → `Utf8`), but kept as a separate variant so the
125 /// type-report can distinguish "the source declared this as text" from
126 /// "the source declared this as fixed-length char".
127 Text,
128 /// Variable-length binary (PostgreSQL `bytea`, MySQL `varbinary`/`blob`).
129 Binary,
130
131 /// JSON / JSONB. Stored as `Utf8 + metadata logical=json` until proper
132 /// struct inference is implemented (roadmap §14).
133 Json,
134 /// UUID. Stored as `Utf8 + metadata logical=uuid` by default; can be
135 /// switched to FixedSizeBinary(16) by policy later (roadmap §14).
136 Uuid,
137
138 // ── M6: Complex Types ─────────────────────────────────────────────────
139 /// Database enum type (PostgreSQL `ENUM`, MySQL `ENUM`/`SET`).
140 /// Stored as `Utf8 + metadata logical=enum` (roadmap §15).
141 Enum,
142
143 /// Time interval (PostgreSQL `interval`).
144 /// Stored as Arrow `Utf8` (ISO 8601 duration string, e.g. `"P1Y2M3D"`).
145 /// `Interval(MonthDayNano)` cannot be written to Parquet, so lossless
146 /// text serialisation is used instead (roadmap §15).
147 Interval,
148
149 /// One-dimensional array of a scalar Rivet type.
150 /// PostgreSQL `int8[]`, `text[]`, `bool[]`, etc.
151 /// Stored as Arrow `List(inner_type)` (roadmap §15).
152 List { inner: Box<RivetType> },
153
154 /// The driver knows about the type but Rivet does not have a safe
155 /// mapping for it (e.g. PostgreSQL `geometry`, `hstore`).
156 /// Carries enough context for an actionable error message in the
157 /// type-report and policy layer.
158 Unsupported { native_type: String, reason: String },
159}
160
161impl RivetType {
162 /// Stable lowercase string label for persistence and human-readable
163 /// reports. Round-trippable with the JSON shape of the variant when
164 /// applicable (e.g. `decimal(18,2)`, `timestamp_tz(microsecond,UTC)`).
165 /// Used by the type-report CLI (Chunk 5).
166 #[allow(dead_code)]
167 pub fn label(&self) -> String {
168 match self {
169 RivetType::Bool => "bool".into(),
170 RivetType::Int16 => "int16".into(),
171 RivetType::Int32 => "int32".into(),
172 RivetType::Int64 => "int64".into(),
173 RivetType::UInt64 => "uint64".into(),
174 RivetType::Float32 => "float32".into(),
175 RivetType::Float64 => "float64".into(),
176 RivetType::Decimal { precision, scale } => format!("decimal({precision},{scale})"),
177 RivetType::Date => "date".into(),
178 RivetType::Time { unit } => format!("time({})", unit.label()),
179 RivetType::Timestamp {
180 unit,
181 timezone: None,
182 } => format!("timestamp({})", unit.label()),
183 RivetType::Timestamp {
184 unit,
185 timezone: Some(tz),
186 } => format!("timestamp_tz({},{tz})", unit.label()),
187 RivetType::String => "string".into(),
188 RivetType::Text => "text".into(),
189 RivetType::Binary => "binary".into(),
190 RivetType::Json => "json".into(),
191 RivetType::Uuid => "uuid".into(),
192 RivetType::Enum => "enum".into(),
193 RivetType::Interval => "interval".into(),
194 RivetType::List { inner } => format!("list<{}>", inner.label()),
195 RivetType::Unsupported { native_type, .. } => format!("unsupported({native_type})"),
196 }
197 }
198
199 /// True for the `Unsupported` variant — convenience for the strict-mode
200 /// gate so callers don't have to `matches!()` everywhere.
201 #[allow(dead_code)]
202 pub fn is_unsupported(&self) -> bool {
203 match self {
204 RivetType::Unsupported { .. } => true,
205 // A list of an unsupported element is itself unsupported — the run
206 // can't build the field. Keep consistent with `derive_fidelity`.
207 RivetType::List { inner } => inner.is_unsupported(),
208 _ => false,
209 }
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
218 fn label_includes_decimal_precision_and_scale() {
219 assert_eq!(
220 RivetType::Decimal {
221 precision: 18,
222 scale: 2,
223 }
224 .label(),
225 "decimal(18,2)"
226 );
227 }
228
229 #[test]
230 fn label_distinguishes_timestamp_with_and_without_timezone() {
231 let naive = RivetType::Timestamp {
232 unit: TimeUnit::Microsecond,
233 timezone: None,
234 };
235 let tz = RivetType::Timestamp {
236 unit: TimeUnit::Microsecond,
237 timezone: Some("UTC".into()),
238 };
239 assert_eq!(naive.label(), "timestamp(microsecond)");
240 assert_eq!(tz.label(), "timestamp_tz(microsecond,UTC)");
241 assert_ne!(naive, tz, "tz=None and tz=Some(\"UTC\") must NOT be equal");
242 }
243
244 #[test]
245 fn unsupported_carries_actionable_context() {
246 let t = RivetType::Unsupported {
247 native_type: "interval".into(),
248 reason: "Arrow Interval mapping not implemented yet".into(),
249 };
250 assert!(t.is_unsupported());
251 assert_eq!(t.label(), "unsupported(interval)");
252 }
253
254 #[test]
255 fn json_serialization_uses_kind_tag() {
256 let t = RivetType::Decimal {
257 precision: 10,
258 scale: 3,
259 };
260 let json: serde_json::Value =
261 serde_json::from_str(&serde_json::to_string(&t).expect("serialize")).expect("parse");
262 assert_eq!(json["kind"], "decimal");
263 assert_eq!(json["precision"], 10);
264 assert_eq!(json["scale"], 3);
265 }
266
267 #[test]
268 fn time_unit_labels_are_stable() {
269 assert_eq!(TimeUnit::Second.label(), "second");
270 assert_eq!(TimeUnit::Millisecond.label(), "millisecond");
271 assert_eq!(TimeUnit::Microsecond.label(), "microsecond");
272 assert_eq!(TimeUnit::Nanosecond.label(), "nanosecond");
273 }
274}