obj_core/codec/schema.rs
1//! [`DynamicSchema`] — declarative description of a postcard-encoded
2//! payload's shape.
3//!
4//! Postcard is **not self-describing**: a postcard payload is a flat
5//! byte stream whose meaning depends entirely on the type that
6//! produced it. To migrate a v1 document into a v2 type at decode
7//! time the codec needs **some** description of the v1 wire shape;
8//! `DynamicSchema` is that description.
9//!
10//! A `DynamicSchema` is pure data — a tree of enum variants, no
11//! generics, no lifetimes beyond `'static`, no allocations beyond
12//! the `Vec`/`Box` literal data carries. The schema describes the
13//! field order + per-field type of a postcard payload; the walker
14//! [`Dynamic::from_postcard_bytes`](crate::codec::Dynamic::from_postcard_bytes)
15//! consumes both schema and bytes and produces a structured
16//! [`Dynamic`](crate::codec::Dynamic) view.
17//!
18//! # Wire-format dependency
19//!
20//! `DynamicSchema` only describes postcard payloads. The mapping of
21//! schema variants → postcard byte-stream operations is the **only**
22//! postcard-specific knowledge in obj's migration path:
23//!
24//! | Schema variant | Postcard wire shape |
25//! |----------------------|------------------------------------------------------------|
26//! | `Null` | (zero bytes — `()` / unit struct) |
27//! | `Bool` | 1 byte (`0` = false, `1` = true) |
28//! | `U64` | unsigned LEB128 varint |
29//! | `I64` | zigzag-encoded varint |
30//! | `F64` | 8 little-endian bytes |
31//! | `String` | varint length + UTF-8 bytes |
32//! | `Bytes` | varint length + raw bytes |
33//! | `Seq(elem)` | varint length, then N elements of the inner schema |
34//! | `Map(fields)` | each `(name, schema)` decoded in order — *no* field names |
35//! | `Enum(variants)` | varint `u32` discriminant, then the matched variant's payload |
36//!
37//! Postcard treats a Rust struct as a **field-ordered tuple** with
38//! no names, so `Map` in this schema does NOT correspond to
39//! postcard's `serialize_map`; it corresponds to a sequence of
40//! per-field bytes. Field names are an obj-side convention used to
41//! tag fields in the resulting `Dynamic` for `get` / `set` / `remove`
42//! addressing.
43//!
44//! For enums, postcard writes a varint `u32` discriminant followed
45//! by the matched variant's payload. Unit variants carry **only**
46//! the discriminant; newtype / tuple / struct variants follow with
47//! the inner type's field-ordered bytes (no length prefix — the
48//! schema names every field for `Dynamic::get` addressing the same
49//! way `Map` does). Tuple variants are represented by `Map` with
50//! synthetic numeric field names (`"0"`, `"1"`, …) — there is no
51//! `Tuple` schema variant.
52//!
53//! # Power-of-ten posture
54//!
55//! - **Rule 1.** The walker that consumes a `DynamicSchema` uses an
56//! explicit stack (`Vec<Frame>`), not Rust-language recursion. Tree
57//! depth is bounded by [`MAX_SCHEMA_DEPTH`].
58//! - **Rule 2.** Every `Seq` / `Map` decode reads its length as a
59//! varint and bounds the per-frame iteration by that length;
60//! pathologically large lengths are rejected before any allocation.
61//! - **Rule 5.** Schema construction is pure-data — runtime
62//! correctness checks live in the walker, not in `DynamicSchema`
63//! itself.
64//! - **Rule 7.** Every fallible walker step propagates via `?`; no
65//! `unwrap` / `expect` on byte-stream input.
66
67#![forbid(unsafe_code)]
68
69/// Maximum nesting depth of a [`DynamicSchema`] tree that the walker
70/// will traverse. Exceeding this bound returns
71/// [`Error::SchemaDepthExceeded`](crate::error::Error::SchemaDepthExceeded).
72///
73/// 32 matches [`crate::codec::dynamic::MAX_DYNAMIC_DEPTH`] so the
74/// schema walker cannot produce a deeper `Dynamic` than the tagged-
75/// format walker can re-encode.
76pub const MAX_SCHEMA_DEPTH: usize = 32;
77
78/// Describes the byte-stream shape of a postcard-encoded payload at
79/// one version. See module docs for the variant ↔ wire-format
80/// mapping.
81///
82/// `DynamicSchema` is `'static`-friendly: literal schemas can live in
83/// const-fn-adjacent contexts (e.g. a `LazyLock` registry) without
84/// borrowing.
85///
86/// `Box`ing the inner schema in `Seq` keeps the enum's `size_of`
87/// reasonable (the alternative — `Vec<DynamicSchema>` of length 1 —
88/// is overkill for the single-element case and reads worse).
89#[derive(Debug, Clone, PartialEq, Eq)]
90#[non_exhaustive]
91pub enum DynamicSchema {
92 /// Unit / zero-byte field. Postcard emits no bytes for `()`.
93 Null,
94 /// Boolean — single byte `0` / `1`.
95 Bool,
96 /// Unsigned 64-bit integer (varint).
97 U64,
98 /// Signed 64-bit integer (zigzag varint).
99 I64,
100 /// 64-bit IEEE-754 float (8 LE bytes).
101 F64,
102 /// UTF-8 string (varint length + bytes).
103 String,
104 /// Raw byte sequence (varint length + bytes).
105 Bytes,
106 /// Variable-length sequence of `elem`-shaped values.
107 Seq(Box<DynamicSchema>),
108 /// Postcard-encoded struct described as an **ordered** list of
109 /// `(field_name, field_schema)` pairs. Order MUST match the
110 /// Rust declaration order of the struct that wrote the bytes —
111 /// postcard reads fields positionally, so a transposed schema
112 /// will mis-decode the payload as silently as any out-of-order
113 /// tuple destructure.
114 Map(Vec<(String, DynamicSchema)>),
115 /// Postcard-encoded enum: a varint `u32` discriminant followed by
116 /// the matched variant's payload bytes. `variants` MUST be sorted
117 /// strictly ascending by [`EnumVariantSchema::discriminant`]; the
118 /// walker binary-searches the list and a missing discriminant
119 /// surfaces as [`Error::SchemaTypeMismatch`](crate::error::Error::SchemaTypeMismatch).
120 ///
121 /// Unit variants set `payload = DynamicSchema::Null`. Newtype
122 /// variants set `payload` to the inner type's schema. Tuple and
123 /// struct variants set `payload = DynamicSchema::Map(...)` with
124 /// the variant's fields in declaration order; for tuple variants
125 /// the field names are the synthetic strings `"0"`, `"1"`, …
126 /// (postcard writes tuple variants positionally — the same wire
127 /// shape as a Rust struct's bytes).
128 Enum(Vec<EnumVariantSchema>),
129}
130
131/// One variant of a [`DynamicSchema::Enum`] description.
132///
133/// `discriminant` is the varint `u32` postcard writes for this
134/// variant (postcard uses the Rust enum's declaration order, so the
135/// first variant has discriminant `0`, the second `1`, and so on —
136/// `#[serde(other)]` / explicit discriminants change this). `name`
137/// is the Rust variant identifier carried through to the decoded
138/// [`Dynamic::Enum`](crate::codec::Dynamic) so a `Migrate::migrate`
139/// impl can distinguish variants by name. `payload` is the variant's
140/// inner shape, `Box`ed because `DynamicSchema` is self-referential.
141#[derive(Debug, Clone, PartialEq, Eq)]
142#[non_exhaustive]
143pub struct EnumVariantSchema {
144 /// Postcard's varint `u32` discriminant for this variant.
145 pub discriminant: u32,
146 /// The Rust variant identifier, carried into
147 /// [`Dynamic::Enum`](crate::codec::Dynamic) verbatim.
148 pub name: String,
149 /// Wire shape of the variant's payload. Use
150 /// [`DynamicSchema::Null`] for unit variants;
151 /// [`DynamicSchema::Map`] for tuple and struct variants
152 /// (synthetic numeric names for tuple variants); the inner
153 /// type's schema for newtype variants.
154 pub payload: Box<DynamicSchema>,
155}
156
157impl EnumVariantSchema {
158 /// Convenience constructor: lift a `(discriminant, name, payload)`
159 /// triple into an [`EnumVariantSchema`]. The `Box` around the
160 /// payload is added internally.
161 #[must_use]
162 pub fn new<S: Into<String>>(discriminant: u32, name: S, payload: DynamicSchema) -> Self {
163 Self {
164 discriminant,
165 name: name.into(),
166 payload: Box::new(payload),
167 }
168 }
169}
170
171impl DynamicSchema {
172 /// Convenience constructor for a `Seq` schema with `elem` as
173 /// the element shape.
174 #[must_use]
175 pub fn seq(elem: DynamicSchema) -> Self {
176 DynamicSchema::Seq(Box::new(elem))
177 }
178
179 /// Convenience constructor for a `Map` schema. `fields` is the
180 /// `(name, schema)` list in **declaration order**.
181 #[must_use]
182 pub fn map<I, S>(fields: I) -> Self
183 where
184 I: IntoIterator<Item = (S, DynamicSchema)>,
185 S: Into<String>,
186 {
187 DynamicSchema::Map(fields.into_iter().map(|(n, s)| (n.into(), s)).collect())
188 }
189
190 /// Convenience constructor for an `Enum` schema. `variants` is
191 /// the list of variants; the caller is responsible for keeping
192 /// them sorted ascending by discriminant — the walker
193 /// debug-asserts the invariant on the first decode of each
194 /// schema. Use [`EnumVariantSchema::new`] for each entry.
195 #[must_use]
196 pub fn enumeration<I>(variants: I) -> Self
197 where
198 I: IntoIterator<Item = EnumVariantSchema>,
199 {
200 let mut v: Vec<EnumVariantSchema> = variants.into_iter().collect();
201 v.sort_by_key(|e| e.discriminant);
202 DynamicSchema::Enum(v)
203 }
204}
205
206/// A type whose postcard wire shape is describable by a
207/// [`DynamicSchema`].
208///
209/// `Schema` is implemented for every `T: Document` (via the M9
210/// derive, see `obj_derive::Document`) and **also** for hand-written
211/// "historical" types that describe the wire shape of an older
212/// version of a Document. Historical types never need to be
213/// `Document`s themselves — they exist purely to describe bytes the
214/// current reader still has on disk.
215///
216/// Separating `Schema` from `Document` keeps the migration registry
217/// (#82) honest: a `historical_schemas()` entry refers to a
218/// `Schema`-only type that owns no collection name and has no
219/// `Document::VERSION` of its own (the `(u32, ...)` pair on the
220/// outside carries the version).
221///
222/// # Power-of-ten posture
223///
224/// - **Rule 9.** No `dyn`; static dispatch via the associated
225/// function below.
226/// - **Rule 5.** Implementations return a fresh `DynamicSchema` by
227/// value — pure data, no shared mutable state.
228pub trait Schema {
229 /// Return the postcard wire-shape description for `Self`.
230 ///
231 /// Implementations build the schema bottom-up from the variants
232 /// in [`DynamicSchema`]. The derive (M10 #82) emits a `Schema`
233 /// impl for every `#[derive(Document)]` type alongside the
234 /// `Document` impl; hand-impls follow the same shape.
235 #[must_use]
236 fn schema() -> DynamicSchema;
237}
238
239#[doc(hidden)]
240pub mod __private {
241 //! Internal helpers used by `obj_derive` and by hand-written
242 //! `historical_schemas()` implementations. Not part of the
243 //! public API surface — the module exists only so the derive
244 //! can refer to a stable path.
245
246 use super::{DynamicSchema, Schema};
247
248 /// Build a `historical_schemas()` return value from a list of
249 /// `(version, fn() -> DynamicSchema)` pairs. Useful when a
250 /// hand-impl wants the same look-and-feel as the derive output.
251 #[must_use]
252 pub fn schemas_from<const N: usize>(
253 entries: [(u32, fn() -> DynamicSchema); N],
254 ) -> Vec<(u32, DynamicSchema)> {
255 entries.into_iter().map(|(v, f)| (v, f())).collect()
256 }
257
258 /// Helper that materialises `<T as Schema>::schema()` — used by
259 /// the derive macro's `historical_schemas()` body so the
260 /// generated code stays short.
261 #[must_use]
262 pub fn schema_of<T: Schema>() -> DynamicSchema {
263 T::schema()
264 }
265}
266
267#[cfg(test)]
268mod tests {
269 use super::*;
270
271 #[test]
272 fn seq_constructor_boxes_inner() {
273 let s = DynamicSchema::seq(DynamicSchema::U64);
274 match s {
275 DynamicSchema::Seq(inner) => assert_eq!(*inner, DynamicSchema::U64),
276 other => panic!("expected Seq, got {other:?}"),
277 }
278 }
279
280 #[test]
281 fn enumeration_constructor_sorts_by_discriminant() {
282 // Caller passes variants out-of-order; the constructor MUST
283 // re-sort so the walker's binary-search precondition holds.
284 let s = DynamicSchema::enumeration([
285 EnumVariantSchema::new(2, "B", DynamicSchema::Null),
286 EnumVariantSchema::new(0, "Z", DynamicSchema::Null),
287 EnumVariantSchema::new(1, "M", DynamicSchema::Null),
288 ]);
289 match s {
290 DynamicSchema::Enum(v) => {
291 assert_eq!(v.len(), 3);
292 assert_eq!(v[0].discriminant, 0);
293 assert_eq!(v[0].name, "Z");
294 assert_eq!(v[1].discriminant, 1);
295 assert_eq!(v[1].name, "M");
296 assert_eq!(v[2].discriminant, 2);
297 assert_eq!(v[2].name, "B");
298 }
299 other => panic!("expected Enum, got {other:?}"),
300 }
301 }
302
303 #[test]
304 fn map_constructor_preserves_order() {
305 let s = DynamicSchema::map([
306 ("a", DynamicSchema::U64),
307 ("b", DynamicSchema::String),
308 ("c", DynamicSchema::Bool),
309 ]);
310 match s {
311 DynamicSchema::Map(fields) => {
312 assert_eq!(fields.len(), 3);
313 assert_eq!(fields[0].0, "a");
314 assert_eq!(fields[1].0, "b");
315 assert_eq!(fields[2].0, "c");
316 }
317 other => panic!("expected Map, got {other:?}"),
318 }
319 }
320}