Skip to main content

obj_core/codec/
schema.rs

1//! [`DynamicSchema`] — declarative description of a postcard-encoded
2//! payload's shape.
3//!
4//! Postcard is **not self-describing**: a postcard payload is a flat
5//! byte stream whose meaning depends entirely on the type that
6//! produced it. To migrate a v1 document into a v2 type at decode
7//! time the codec needs **some** description of the v1 wire shape;
8//! `DynamicSchema` is that description.
9//!
10//! A `DynamicSchema` is pure data — a tree of enum variants, no
11//! generics, no lifetimes beyond `'static`, no allocations beyond
12//! the `Vec`/`Box` literal data carries. The schema describes the
13//! field order + per-field type of a postcard payload; the walker
14//! [`Dynamic::from_postcard_bytes`](crate::codec::Dynamic::from_postcard_bytes)
15//! consumes both schema and bytes and produces a structured
16//! [`Dynamic`](crate::codec::Dynamic) view.
17//!
18//! # Wire-format dependency
19//!
20//! `DynamicSchema` only describes postcard payloads. The mapping of
21//! schema variants → postcard byte-stream operations is the **only**
22//! postcard-specific knowledge in obj's migration path:
23//!
24//! | Schema variant       | Postcard wire shape                                        |
25//! |----------------------|------------------------------------------------------------|
26//! | `Null`               | (zero bytes — `()` / unit struct)                          |
27//! | `Bool`               | 1 byte (`0` = false, `1` = true)                           |
28//! | `U64`                | unsigned LEB128 varint                                     |
29//! | `I64`                | zigzag-encoded varint                                      |
30//! | `F64`                | 8 little-endian bytes                                      |
31//! | `String`             | varint length + UTF-8 bytes                                |
32//! | `Bytes`              | varint length + raw bytes                                  |
33//! | `Seq(elem)`          | varint length, then N elements of the inner schema         |
34//! | `Map(fields)`        | each `(name, schema)` decoded in order — *no* field names  |
35//! | `Enum(variants)`     | varint `u32` discriminant, then the matched variant's payload |
36//!
37//! Postcard treats a Rust struct as a **field-ordered tuple** with
38//! no names, so `Map` in this schema does NOT correspond to
39//! postcard's `serialize_map`; it corresponds to a sequence of
40//! per-field bytes. Field names are an obj-side convention used to
41//! tag fields in the resulting `Dynamic` for `get` / `set` / `remove`
42//! addressing.
43//!
44//! For enums, postcard writes a varint `u32` discriminant followed
45//! by the matched variant's payload. Unit variants carry **only**
46//! the discriminant; newtype / tuple / struct variants follow with
47//! the inner type's field-ordered bytes (no length prefix — the
48//! schema names every field for `Dynamic::get` addressing the same
49//! way `Map` does). Tuple variants are represented by `Map` with
50//! synthetic numeric field names (`"0"`, `"1"`, …) — there is no
51//! `Tuple` schema variant.
52//!
53//! # Power-of-ten posture
54//!
55//! - **Rule 1.** The walker that consumes a `DynamicSchema` uses an
56//!   explicit stack (`Vec<Frame>`), not Rust-language recursion. Tree
57//!   depth is bounded by [`MAX_SCHEMA_DEPTH`].
58//! - **Rule 2.** Every `Seq` / `Map` decode reads its length as a
59//!   varint and bounds the per-frame iteration by that length;
60//!   pathologically large lengths are rejected before any allocation.
61//! - **Rule 5.** Schema construction is pure-data — runtime
62//!   correctness checks live in the walker, not in `DynamicSchema`
63//!   itself.
64//! - **Rule 7.** Every fallible walker step propagates via `?`; no
65//!   `unwrap` / `expect` on byte-stream input.
66
67#![forbid(unsafe_code)]
68
69/// Maximum nesting depth of a [`DynamicSchema`] tree that the walker
70/// will traverse. Exceeding this bound returns
71/// [`Error::SchemaDepthExceeded`](crate::error::Error::SchemaDepthExceeded).
72///
73/// 32 matches [`crate::codec::dynamic::MAX_DYNAMIC_DEPTH`] so the
74/// schema walker cannot produce a deeper `Dynamic` than the tagged-
75/// format walker can re-encode.
76pub const MAX_SCHEMA_DEPTH: usize = 32;
77
78/// Describes the byte-stream shape of a postcard-encoded payload at
79/// one version. See module docs for the variant ↔ wire-format
80/// mapping.
81///
82/// `DynamicSchema` is `'static`-friendly: literal schemas can live in
83/// const-fn-adjacent contexts (e.g. a `LazyLock` registry) without
84/// borrowing.
85///
86/// `Box`ing the inner schema in `Seq` keeps the enum's `size_of`
87/// reasonable (the alternative — `Vec<DynamicSchema>` of length 1 —
88/// is overkill for the single-element case and reads worse).
89#[derive(Debug, Clone, PartialEq, Eq)]
90#[non_exhaustive]
91pub enum DynamicSchema {
92    /// Unit / zero-byte field. Postcard emits no bytes for `()`.
93    Null,
94    /// Boolean — single byte `0` / `1`.
95    Bool,
96    /// Unsigned 64-bit integer (varint).
97    U64,
98    /// Signed 64-bit integer (zigzag varint).
99    I64,
100    /// 64-bit IEEE-754 float (8 LE bytes).
101    F64,
102    /// UTF-8 string (varint length + bytes).
103    String,
104    /// Raw byte sequence (varint length + bytes).
105    Bytes,
106    /// Variable-length sequence of `elem`-shaped values.
107    Seq(Box<DynamicSchema>),
108    /// Postcard-encoded struct described as an **ordered** list of
109    /// `(field_name, field_schema)` pairs. Order MUST match the
110    /// Rust declaration order of the struct that wrote the bytes —
111    /// postcard reads fields positionally, so a transposed schema
112    /// will mis-decode the payload as silently as any out-of-order
113    /// tuple destructure.
114    Map(Vec<(String, DynamicSchema)>),
115    /// Postcard-encoded enum: a varint `u32` discriminant followed by
116    /// the matched variant's payload bytes. `variants` MUST be sorted
117    /// strictly ascending by [`EnumVariantSchema::discriminant`]; the
118    /// walker binary-searches the list and a missing discriminant
119    /// surfaces as [`Error::SchemaTypeMismatch`](crate::error::Error::SchemaTypeMismatch).
120    ///
121    /// Unit variants set `payload = DynamicSchema::Null`. Newtype
122    /// variants set `payload` to the inner type's schema. Tuple and
123    /// struct variants set `payload = DynamicSchema::Map(...)` with
124    /// the variant's fields in declaration order; for tuple variants
125    /// the field names are the synthetic strings `"0"`, `"1"`, …
126    /// (postcard writes tuple variants positionally — the same wire
127    /// shape as a Rust struct's bytes).
128    Enum(Vec<EnumVariantSchema>),
129}
130
131/// One variant of a [`DynamicSchema::Enum`] description.
132///
133/// `discriminant` is the varint `u32` postcard writes for this
134/// variant (postcard uses the Rust enum's declaration order, so the
135/// first variant has discriminant `0`, the second `1`, and so on —
136/// `#[serde(other)]` / explicit discriminants change this). `name`
137/// is the Rust variant identifier carried through to the decoded
138/// [`Dynamic::Enum`](crate::codec::Dynamic) so a `Migrate::migrate`
139/// impl can distinguish variants by name. `payload` is the variant's
140/// inner shape, `Box`ed because `DynamicSchema` is self-referential.
141#[derive(Debug, Clone, PartialEq, Eq)]
142#[non_exhaustive]
143pub struct EnumVariantSchema {
144    /// Postcard's varint `u32` discriminant for this variant.
145    pub discriminant: u32,
146    /// The Rust variant identifier, carried into
147    /// [`Dynamic::Enum`](crate::codec::Dynamic) verbatim.
148    pub name: String,
149    /// Wire shape of the variant's payload. Use
150    /// [`DynamicSchema::Null`] for unit variants;
151    /// [`DynamicSchema::Map`] for tuple and struct variants
152    /// (synthetic numeric names for tuple variants); the inner
153    /// type's schema for newtype variants.
154    pub payload: Box<DynamicSchema>,
155}
156
157impl EnumVariantSchema {
158    /// Convenience constructor: lift a `(discriminant, name, payload)`
159    /// triple into an [`EnumVariantSchema`]. The `Box` around the
160    /// payload is added internally.
161    #[must_use]
162    pub fn new<S: Into<String>>(discriminant: u32, name: S, payload: DynamicSchema) -> Self {
163        Self {
164            discriminant,
165            name: name.into(),
166            payload: Box::new(payload),
167        }
168    }
169}
170
171impl DynamicSchema {
172    /// Convenience constructor for a `Seq` schema with `elem` as
173    /// the element shape.
174    #[must_use]
175    pub fn seq(elem: DynamicSchema) -> Self {
176        DynamicSchema::Seq(Box::new(elem))
177    }
178
179    /// Convenience constructor for a `Map` schema. `fields` is the
180    /// `(name, schema)` list in **declaration order**.
181    #[must_use]
182    pub fn map<I, S>(fields: I) -> Self
183    where
184        I: IntoIterator<Item = (S, DynamicSchema)>,
185        S: Into<String>,
186    {
187        DynamicSchema::Map(fields.into_iter().map(|(n, s)| (n.into(), s)).collect())
188    }
189
190    /// Convenience constructor for an `Enum` schema. `variants` is
191    /// the list of variants; the caller is responsible for keeping
192    /// them sorted ascending by discriminant — the walker
193    /// debug-asserts the invariant on the first decode of each
194    /// schema. Use [`EnumVariantSchema::new`] for each entry.
195    #[must_use]
196    pub fn enumeration<I>(variants: I) -> Self
197    where
198        I: IntoIterator<Item = EnumVariantSchema>,
199    {
200        let mut v: Vec<EnumVariantSchema> = variants.into_iter().collect();
201        v.sort_by_key(|e| e.discriminant);
202        DynamicSchema::Enum(v)
203    }
204}
205
206/// A type whose postcard wire shape is describable by a
207/// [`DynamicSchema`].
208///
209/// `Schema` is implemented for every `T: Document` (via the M9
210/// derive, see `obj_derive::Document`) and **also** for hand-written
211/// "historical" types that describe the wire shape of an older
212/// version of a Document. Historical types never need to be
213/// `Document`s themselves — they exist purely to describe bytes the
214/// current reader still has on disk.
215///
216/// Separating `Schema` from `Document` keeps the migration registry
217/// (#82) honest: a `historical_schemas()` entry refers to a
218/// `Schema`-only type that owns no collection name and has no
219/// `Document::VERSION` of its own (the `(u32, ...)` pair on the
220/// outside carries the version).
221///
222/// # Power-of-ten posture
223///
224/// - **Rule 9.** No `dyn`; static dispatch via the associated
225///   function below.
226/// - **Rule 5.** Implementations return a fresh `DynamicSchema` by
227///   value — pure data, no shared mutable state.
228pub trait Schema {
229    /// Return the postcard wire-shape description for `Self`.
230    ///
231    /// Implementations build the schema bottom-up from the variants
232    /// in [`DynamicSchema`].  The derive (M10 #82) emits a `Schema`
233    /// impl for every `#[derive(Document)]` type alongside the
234    /// `Document` impl; hand-impls follow the same shape.
235    #[must_use]
236    fn schema() -> DynamicSchema;
237}
238
239#[doc(hidden)]
240pub mod __private {
241    //! Internal helpers used by `obj_derive` and by hand-written
242    //! `historical_schemas()` implementations. Not part of the
243    //! public API surface — the module exists only so the derive
244    //! can refer to a stable path.
245
246    use super::{DynamicSchema, Schema};
247
248    /// Build a `historical_schemas()` return value from a list of
249    /// `(version, fn() -> DynamicSchema)` pairs. Useful when a
250    /// hand-impl wants the same look-and-feel as the derive output.
251    #[must_use]
252    pub fn schemas_from<const N: usize>(
253        entries: [(u32, fn() -> DynamicSchema); N],
254    ) -> Vec<(u32, DynamicSchema)> {
255        entries.into_iter().map(|(v, f)| (v, f())).collect()
256    }
257
258    /// Helper that materialises `<T as Schema>::schema()` — used by
259    /// the derive macro's `historical_schemas()` body so the
260    /// generated code stays short.
261    #[must_use]
262    pub fn schema_of<T: Schema>() -> DynamicSchema {
263        T::schema()
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn seq_constructor_boxes_inner() {
273        let s = DynamicSchema::seq(DynamicSchema::U64);
274        match s {
275            DynamicSchema::Seq(inner) => assert_eq!(*inner, DynamicSchema::U64),
276            other => panic!("expected Seq, got {other:?}"),
277        }
278    }
279
280    #[test]
281    fn enumeration_constructor_sorts_by_discriminant() {
282        // Caller passes variants out-of-order; the constructor MUST
283        // re-sort so the walker's binary-search precondition holds.
284        let s = DynamicSchema::enumeration([
285            EnumVariantSchema::new(2, "B", DynamicSchema::Null),
286            EnumVariantSchema::new(0, "Z", DynamicSchema::Null),
287            EnumVariantSchema::new(1, "M", DynamicSchema::Null),
288        ]);
289        match s {
290            DynamicSchema::Enum(v) => {
291                assert_eq!(v.len(), 3);
292                assert_eq!(v[0].discriminant, 0);
293                assert_eq!(v[0].name, "Z");
294                assert_eq!(v[1].discriminant, 1);
295                assert_eq!(v[1].name, "M");
296                assert_eq!(v[2].discriminant, 2);
297                assert_eq!(v[2].name, "B");
298            }
299            other => panic!("expected Enum, got {other:?}"),
300        }
301    }
302
303    #[test]
304    fn map_constructor_preserves_order() {
305        let s = DynamicSchema::map([
306            ("a", DynamicSchema::U64),
307            ("b", DynamicSchema::String),
308            ("c", DynamicSchema::Bool),
309        ]);
310        match s {
311            DynamicSchema::Map(fields) => {
312                assert_eq!(fields.len(), 3);
313                assert_eq!(fields[0].0, "a");
314                assert_eq!(fields[1].0, "b");
315                assert_eq!(fields[2].0, "c");
316            }
317            other => panic!("expected Map, got {other:?}"),
318        }
319    }
320}