Skip to main content

obj_core/index/
extract.rs

1//! Field extraction: `Document` → encoded index keys.
2//!
3//! [`extract_index_keys`] is the bridge between a user `Document`
4//! and the per-index B-trees of #57's catalog layer. For each
5//! declared index it walks the document's field path(s) and hands
6//! the resolved [`Dynamic`] values to `encode_index_key` (#55).
7//!
8//! # postcard is not self-describing
9//!
10//! The M5 [`Dynamic`] type ships a `from_postcard_bytes` decoder
11//! that ONLY accepts the tagged-Dynamic wire format — NOT raw
12//! native-postcard payloads. M7 field extraction cannot use that
13//! decoder for user documents (which are always native postcard).
14//!
15//! The workaround is a dedicated serde-driven reflection: a
16//! `DynamicSerializer` walks the document's `serde::Serialize`
17//! impl and emits a `Dynamic` tree with `Dynamic::Map` for every
18//! struct and `Dynamic::Seq` for every sequence. The result is
19//! the same shape `Dynamic::get` is built for, so the top-level
20//! field-path walk in `extract_index_keys` is one `Dynamic::get`
21//! call per `IndexSpec::key_paths` entry.
22//!
23//! # Path limitations
24//!
25//! M7 supports **top-level field paths only** — a single field name
26//! within the document's struct. Dotted paths (`"address.city"`),
27//! array indexing (`"tags[0]"`), and `JSONPath` syntax are out of
28//! scope. The `IndexSpec::key_paths` vector is a list of top-level
29//! field names; each entry is one `Dynamic::get` lookup. The
30//! limitation is documented in `docs/format.md` § Indexes.
31//!
32//! # Power-of-ten posture
33//!
34//! - **Rule 1.** No recursion in the extractor itself — the
35//!   serializer is the only place where the call graph naturally
36//!   reflects the document structure, and it carries an explicit
37//!   `MAX_REFLECT_DEPTH` bound that mirrors `MAX_DYNAMIC_DEPTH`.
38//! - **Rule 2.** Per-key-path iteration is bounded by the spec's
39//!   `key_paths.len()`. `Each`-kind extraction is bounded by
40//!   [`MAX_EACH_ENTRIES`] (16 384) — beyond which extraction errors
41//!   with [`crate::Error::EachIndexTooLarge`] (added later in #59;
42//!   M7 currently uses an `Error::InvalidArgument` placeholder
43//!   pending that variant's introduction).
44//! - **Rule 4.** `extract_index_keys` is short — per-kind helpers
45//!   carry the heavy lifting (one helper per kind).
46//! - **Rule 7.** No `unwrap` / `expect` on the production path.
47//!   Missing field / wrong type surface as the specific
48//!   `Error::IndexField*` variants.
49
50#![forbid(unsafe_code)]
51
52use std::collections::BTreeMap;
53
54use serde::{ser, Serialize};
55
56use crate::codec::{Document, Dynamic};
57use crate::error::{Error, Result};
58use crate::index::key::{encode_field, encode_index_key, EncodedIndexKey};
59use crate::index::spec::{IndexKind, IndexSpec};
60
61/// Maximum depth of the document-reflection walk. Mirrors the
62/// `MAX_DYNAMIC_DEPTH` bound in [`crate::codec::dynamic`] —
63/// defensive against pathological nested-struct inputs that would
64/// otherwise grow `Dynamic` unboundedly.
65const MAX_REFLECT_DEPTH: usize = 32;
66
67/// Maximum number of entries a single `Each` extraction may emit.
68/// 16 384 is the same ceiling we use for the per-document key set;
69/// exceeding it suggests a runaway data shape rather than a real
70/// indexable field.
71pub const MAX_EACH_ENTRIES: usize = 16_384;
72
73/// Extract the set of encoded index keys for `doc` under `spec`.
74///
75/// - For `Standard`, `Unique`, `Composite`: returns exactly one
76///   [`EncodedIndexKey`].
77/// - For `Each`: returns one entry per element of the sequence at
78///   the configured path. Empty sequence → empty `Vec` (no index
79///   work for this doc on this index).
80///
81/// `collection` is plumbed through purely so error variants carry
82/// the collection name in their context (the catalog reconciler
83/// in #57 calls this per-collection).
84///
85/// # Errors
86///
87/// - [`Error::IndexFieldMissing`] if the configured path is absent.
88/// - [`Error::IndexFieldTypeMismatch`] for `Each` on a non-sequence,
89///   `Composite` field on a `Map`, etc.
90/// - [`Error::InvalidArgument`] if `Each` would emit more than
91///   [`MAX_EACH_ENTRIES`].
92/// - Propagates encoding errors from [`encode_index_key`].
93pub fn extract_index_keys<T: Document>(
94    collection: &str,
95    spec: &IndexSpec,
96    doc: &T,
97) -> Result<Vec<EncodedIndexKey>> {
98    // #92: project ONLY the indexed field path(s) out of `doc` instead
99    // of materializing the whole document via `to_dynamic`. The
100    // projecting serializer walks the top-level struct and emits a
101    // `Dynamic` for the named field(s) alone — every other field
102    // (notably a large `payload`) is visited by a no-op serializer
103    // that allocates nothing. The resolved `Dynamic`s are then handed
104    // to the SAME `encode_index_key` the full-doc path used, so the
105    // produced `EncodedIndexKey` is byte-identical (see the
106    // `project_*_byte_identical_to_full_doc` tests).
107    let fields = project_fields(collection, spec, doc)?;
108    match spec.kind {
109        IndexKind::Standard | IndexKind::Unique => extract_scalar(spec, &fields).map(|k| vec![k]),
110        IndexKind::Each => extract_each(collection, spec, &fields),
111        IndexKind::Composite => extract_composite(spec, &fields).map(|k| vec![k]),
112    }
113}
114
115/// Encode the single resolved scalar field. Used by `Standard` and
116/// `Unique` kinds. `fields` carries exactly one entry — the value at
117/// `spec.key_paths[0]` resolved by [`project_fields`].
118fn extract_scalar(spec: &IndexSpec, fields: &[Dynamic]) -> Result<EncodedIndexKey> {
119    debug_assert_eq!(fields.len(), 1, "scalar kind projects exactly one field");
120    encode_index_key(spec, fields)
121}
122
123/// Encode one key per element of the resolved sequence field.
124fn extract_each(
125    collection: &str,
126    spec: &IndexSpec,
127    fields: &[Dynamic],
128) -> Result<Vec<EncodedIndexKey>> {
129    debug_assert_eq!(fields.len(), 1, "Each kind projects exactly one field");
130    let value = &fields[0];
131    let Dynamic::Seq(items) = value else {
132        return Err(Error::IndexFieldTypeMismatch {
133            collection: collection.to_owned(),
134            index: spec.name.clone(),
135            path: spec.key_paths[0].clone(),
136            expected: "Seq",
137            found: dynamic_kind_name(value),
138        });
139    };
140    if items.len() > MAX_EACH_ENTRIES {
141        return Err(Error::EachIndexTooLarge {
142            collection: collection.to_owned(),
143            index: spec.name.clone(),
144            len: items.len(),
145            max: MAX_EACH_ENTRIES,
146        });
147    }
148    let mut out = Vec::with_capacity(items.len());
149    for element in items {
150        out.push(encode_field(element)?);
151    }
152    Ok(out)
153}
154
155/// Encode the resolved composite fields into the envelope.
156fn extract_composite(spec: &IndexSpec, fields: &[Dynamic]) -> Result<EncodedIndexKey> {
157    debug_assert!(fields.len() >= 2, "composite projects ≥ 2 fields");
158    encode_index_key(spec, fields)
159}
160
161/// Project the `Dynamic` value at each `spec.key_paths` entry out of
162/// `doc`, visiting ONLY those fields (every other field is discarded
163/// by a no-op serializer — see [`NullSerializer`]). Returns one
164/// resolved [`Dynamic`] per path, in `key_paths` order.
165///
166/// # Errors
167///
168/// - [`Error::IndexFieldMissing`] if a path is absent from `doc`'s
169///   top-level struct (parity with the old `lookup_field` →
170///   `Dynamic::get` → `None` path).
171/// - [`Error::InvalidArgument`] if `doc` rejects reflection (a
172///   non-struct/map top-level, depth overflow, or a `Serialize` impl
173///   error) — collapsed from [`DynamicSerError`].
174fn project_fields<T: Document>(
175    collection: &str,
176    spec: &IndexSpec,
177    doc: &T,
178) -> Result<Vec<Dynamic>> {
179    debug_assert!(!spec.key_paths.is_empty(), "spec has ≥ 1 key path");
180    let projector = FieldProjector::new(&spec.key_paths);
181    let mut resolved = doc.serialize(projector).map_err(Error::from)?;
182    let mut out = Vec::with_capacity(spec.key_paths.len());
183    for path in &spec.key_paths {
184        let value = resolved
185            .remove(path)
186            .ok_or_else(|| Error::IndexFieldMissing {
187                collection: collection.to_owned(),
188                index: spec.name.clone(),
189                path: path.clone(),
190            })?;
191        out.push(value);
192    }
193    Ok(out)
194}
195
196/// Diagnostic name of a `Dynamic` variant, used in
197/// [`Error::IndexFieldTypeMismatch::found`].
198fn dynamic_kind_name(value: &Dynamic) -> &'static str {
199    match value {
200        Dynamic::Null => "Null",
201        Dynamic::Bool(_) => "Bool",
202        Dynamic::U64(_) => "U64",
203        Dynamic::I64(_) => "I64",
204        Dynamic::F64(_) => "F64",
205        Dynamic::String(_) => "String",
206        Dynamic::Bytes(_) => "Bytes",
207        Dynamic::Seq(_) => "Seq",
208        Dynamic::Map(_) => "Map",
209        Dynamic::Enum { .. } => "Enum",
210    }
211}
212
213/// Convert a `T: Serialize` into a full `Dynamic` tree by driving
214/// serde through the [`DynamicSerializer`].
215///
216/// This is **not** a postcard round-trip — postcard is not self-
217/// describing and cannot reconstruct field names. The serializer
218/// emits a `Dynamic::Map` for every struct, keyed by serde's field
219/// names; the field values are nested `Dynamic` trees following the
220/// same rules recursively (bounded by [`MAX_REFLECT_DEPTH`]).
221///
222/// # #92 status
223///
224/// The index-extraction hot path NO LONGER calls this — it projects
225/// only the indexed field(s) via [`FieldProjector`] so a large
226/// unindexed `payload` is never cloned into a `Dynamic`. The
227/// full-document walk is retained (test-gated) as the byte-identity
228/// oracle the projecting path is validated against, and as the
229/// reference shape for any future full-doc reflection caller
230/// (reconcile / migration); `DynamicSerializer` itself remains live
231/// on the production path because [`FieldProjector`] reuses it to
232/// reflect each matched field's value.
233#[cfg(test)]
234fn to_dynamic<T: Serialize>(value: &T) -> Result<Dynamic> {
235    let ser = DynamicSerializer { depth: 0 };
236    value.serialize(ser).map_err(Error::from)
237}
238
239// ---------- FieldProjector (#92) ---------------------------------
240//
241// The field-projecting serializer. Where `DynamicSerializer` builds a
242// `Dynamic` for the WHOLE document, `FieldProjector` walks only the
243// top-level struct (or map) and emits a `Dynamic` for the field(s)
244// named in `wanted`. Every other field is fed to [`NullSerializer`],
245// which allocates nothing — in particular it never clones a large
246// `payload: Vec<u8>` into a `Dynamic::Bytes`. The named field's value
247// IS reflected through the full `DynamicSerializer`, so the resolved
248// `Dynamic` is bit-for-bit what the full-doc path would have produced
249// for that field (the byte-identity guardrail).
250
251/// Top-level field-projecting `Serializer`. Collects the values of the
252/// `wanted` field names into `collected`; discards every other field.
253///
254/// Only the struct / struct-variant / map top-level shapes collect
255/// anything — a top-level scalar / seq / enum has no named fields, so
256/// `collected` stays empty and [`project_fields`] surfaces the same
257/// [`Error::IndexFieldMissing`] the old `Dynamic::get`-on-a-non-Map
258/// path produced (error parity).
259struct FieldProjector<'w> {
260    wanted: &'w [String],
261}
262
263impl<'w> FieldProjector<'w> {
264    fn new(wanted: &'w [String]) -> Self {
265        Self { wanted }
266    }
267}
268
269/// The accumulator a `FieldProjector` hands back: the resolved
270/// `Dynamic` for each matched field name. A `BTreeMap` (not a `Vec`)
271/// so `project_fields` can pull paths out in `key_paths` order and so
272/// a duplicate field name (impossible for a real struct) cannot grow
273/// the result unboundedly.
274type ProjectedFields = BTreeMap<String, Dynamic>;
275
276/// Builder shared by `SerializeStruct` / `SerializeStructVariant` /
277/// `SerializeMap` for the projecting walk. Holds the wanted-name set
278/// and the fields matched so far.
279struct ProjectBuilder<'w> {
280    wanted: &'w [String],
281    collected: ProjectedFields,
282    pending_key: Option<String>,
283}
284
285impl<'w> ProjectBuilder<'w> {
286    fn new(wanted: &'w [String]) -> Self {
287        Self {
288            wanted,
289            collected: BTreeMap::new(),
290            pending_key: None,
291        }
292    }
293
294    /// Reflect `value` into a `Dynamic` only if `key` is wanted;
295    /// otherwise discard it through [`NullSerializer`] (no allocation).
296    fn take_field<T: ?Sized + Serialize>(&mut self, key: &str, value: &T) -> DynRes<()> {
297        if self.wanted.iter().any(|w| w == key) {
298            let val = value.serialize(DynamicSerializer { depth: 1 })?;
299            self.collected.insert(key.to_owned(), val);
300        } else {
301            value.serialize(NullSerializer)?;
302        }
303        Ok(())
304    }
305}
306
307#[allow(clippy::unused_self)] // serde's by-value receiver; mirrors DynamicSerializer
308impl<'w> ser::Serializer for FieldProjector<'w> {
309    type Ok = ProjectedFields;
310    type Error = DynamicSerError;
311    type SerializeSeq = ser::Impossible<ProjectedFields, DynamicSerError>;
312    type SerializeTuple = ser::Impossible<ProjectedFields, DynamicSerError>;
313    type SerializeTupleStruct = ser::Impossible<ProjectedFields, DynamicSerError>;
314    type SerializeTupleVariant = ser::Impossible<ProjectedFields, DynamicSerError>;
315    type SerializeMap = ProjectBuilder<'w>;
316    type SerializeStruct = ProjectBuilder<'w>;
317    type SerializeStructVariant = ProjectBuilder<'w>;
318
319    fn serialize_struct(self, _name: &'static str, _len: usize) -> DynRes<ProjectBuilder<'w>> {
320        Ok(ProjectBuilder::new(self.wanted))
321    }
322    fn serialize_struct_variant(
323        self,
324        _name: &'static str,
325        _variant_index: u32,
326        _variant: &'static str,
327        _len: usize,
328    ) -> DynRes<ProjectBuilder<'w>> {
329        Ok(ProjectBuilder::new(self.wanted))
330    }
331    fn serialize_map(self, _len: Option<usize>) -> DynRes<ProjectBuilder<'w>> {
332        Ok(ProjectBuilder::new(self.wanted))
333    }
334    // Transparent wrappers recurse with the same projector so a
335    // `#[serde(transparent)]` newtype or an `Option<Struct>` top level
336    // still projects the inner struct's fields (parity with
337    // `DynamicSerializer`'s pass-through of these shapes).
338    fn serialize_newtype_struct<T: ?Sized + Serialize>(
339        self,
340        _name: &'static str,
341        v: &T,
342    ) -> DynRes<ProjectedFields> {
343        v.serialize(self)
344    }
345    fn serialize_some<T: ?Sized + Serialize>(self, v: &T) -> DynRes<ProjectedFields> {
346        v.serialize(self)
347    }
348    // Every non-struct top level resolves to "no named fields" — an
349    // empty map. `project_fields` turns that into IndexFieldMissing,
350    // matching the old `Dynamic::get` on a non-Map returning `None`.
351    fn serialize_i128(self, _v: i128) -> DynRes<ProjectedFields> {
352        Ok(BTreeMap::new())
353    }
354    fn serialize_u128(self, _v: u128) -> DynRes<ProjectedFields> {
355        Ok(BTreeMap::new())
356    }
357    fn serialize_bool(self, _v: bool) -> DynRes<ProjectedFields> {
358        Ok(BTreeMap::new())
359    }
360    fn serialize_i8(self, _v: i8) -> DynRes<ProjectedFields> {
361        Ok(BTreeMap::new())
362    }
363    fn serialize_i16(self, _v: i16) -> DynRes<ProjectedFields> {
364        Ok(BTreeMap::new())
365    }
366    fn serialize_i32(self, _v: i32) -> DynRes<ProjectedFields> {
367        Ok(BTreeMap::new())
368    }
369    fn serialize_i64(self, _v: i64) -> DynRes<ProjectedFields> {
370        Ok(BTreeMap::new())
371    }
372    fn serialize_u8(self, _v: u8) -> DynRes<ProjectedFields> {
373        Ok(BTreeMap::new())
374    }
375    fn serialize_u16(self, _v: u16) -> DynRes<ProjectedFields> {
376        Ok(BTreeMap::new())
377    }
378    fn serialize_u32(self, _v: u32) -> DynRes<ProjectedFields> {
379        Ok(BTreeMap::new())
380    }
381    fn serialize_u64(self, _v: u64) -> DynRes<ProjectedFields> {
382        Ok(BTreeMap::new())
383    }
384    fn serialize_f32(self, _v: f32) -> DynRes<ProjectedFields> {
385        Ok(BTreeMap::new())
386    }
387    fn serialize_f64(self, _v: f64) -> DynRes<ProjectedFields> {
388        Ok(BTreeMap::new())
389    }
390    fn serialize_char(self, _v: char) -> DynRes<ProjectedFields> {
391        Ok(BTreeMap::new())
392    }
393    fn serialize_str(self, _v: &str) -> DynRes<ProjectedFields> {
394        Ok(BTreeMap::new())
395    }
396    fn serialize_bytes(self, _v: &[u8]) -> DynRes<ProjectedFields> {
397        Ok(BTreeMap::new())
398    }
399    fn serialize_none(self) -> DynRes<ProjectedFields> {
400        Ok(BTreeMap::new())
401    }
402    fn serialize_unit(self) -> DynRes<ProjectedFields> {
403        Ok(BTreeMap::new())
404    }
405    fn serialize_unit_struct(self, _name: &'static str) -> DynRes<ProjectedFields> {
406        Ok(BTreeMap::new())
407    }
408    fn serialize_unit_variant(
409        self,
410        _name: &'static str,
411        _variant_index: u32,
412        _variant: &'static str,
413    ) -> DynRes<ProjectedFields> {
414        Ok(BTreeMap::new())
415    }
416    fn serialize_newtype_variant<T: ?Sized + Serialize>(
417        self,
418        _name: &'static str,
419        _variant_index: u32,
420        _variant: &'static str,
421        _v: &T,
422    ) -> DynRes<ProjectedFields> {
423        Ok(BTreeMap::new())
424    }
425    fn serialize_seq(self, _len: Option<usize>) -> DynRes<Self::SerializeSeq> {
426        Err(seq_unsupported())
427    }
428    fn serialize_tuple(self, _len: usize) -> DynRes<Self::SerializeTuple> {
429        Err(seq_unsupported())
430    }
431    fn serialize_tuple_struct(
432        self,
433        _name: &'static str,
434        _len: usize,
435    ) -> DynRes<Self::SerializeTupleStruct> {
436        Err(seq_unsupported())
437    }
438    fn serialize_tuple_variant(
439        self,
440        _name: &'static str,
441        _variant_index: u32,
442        _variant: &'static str,
443        _len: usize,
444    ) -> DynRes<Self::SerializeTupleVariant> {
445        Err(seq_unsupported())
446    }
447}
448
449/// A top-level tuple / tuple-struct / tuple-variant has positional
450/// (not named) fields and so contributes no addressable index field —
451/// the same outcome as `Dynamic::get` on a non-Map. Surface it as the
452/// reflection-rejected error rather than silently succeeding with no
453/// fields, so a mis-shaped spec is not masked.
454fn seq_unsupported() -> DynamicSerError {
455    DynamicSerError("index extraction: top-level tuple has no named fields to project".to_owned())
456}
457
458impl ser::SerializeStruct for ProjectBuilder<'_> {
459    type Ok = ProjectedFields;
460    type Error = DynamicSerError;
461    fn serialize_field<T: ?Sized + Serialize>(
462        &mut self,
463        key: &'static str,
464        value: &T,
465    ) -> DynRes<()> {
466        self.take_field(key, value)
467    }
468    fn end(self) -> DynRes<ProjectedFields> {
469        Ok(self.collected)
470    }
471}
472
473impl ser::SerializeStructVariant for ProjectBuilder<'_> {
474    type Ok = ProjectedFields;
475    type Error = DynamicSerError;
476    fn serialize_field<T: ?Sized + Serialize>(
477        &mut self,
478        key: &'static str,
479        value: &T,
480    ) -> DynRes<()> {
481        self.take_field(key, value)
482    }
483    fn end(self) -> DynRes<ProjectedFields> {
484        Ok(self.collected)
485    }
486}
487
488impl ser::SerializeMap for ProjectBuilder<'_> {
489    type Ok = ProjectedFields;
490    type Error = DynamicSerError;
491    fn serialize_key<T: ?Sized + Serialize>(&mut self, key: &T) -> DynRes<()> {
492        // Resolve the key to a String the same way `DynamicSerializer`'s
493        // MapBuilder does, so a stringable map key projects identically
494        // to a struct field of the same name.
495        let key_dyn = key.serialize(DynamicSerializer { depth: 1 })?;
496        let key_string = match key_dyn {
497            Dynamic::String(s) => s,
498            Dynamic::U64(n) => n.to_string(),
499            Dynamic::I64(n) => n.to_string(),
500            Dynamic::Bool(b) => b.to_string(),
501            other => {
502                return Err(DynamicSerError(format!(
503                    "map key must be stringable (got {other:?})"
504                )));
505            }
506        };
507        self.pending_key = Some(key_string);
508        Ok(())
509    }
510    fn serialize_value<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
511        let key = self
512            .pending_key
513            .take()
514            .ok_or_else(|| DynamicSerError("map value without preceding key".to_owned()))?;
515        self.take_field(&key, value)
516    }
517    fn end(self) -> DynRes<ProjectedFields> {
518        Ok(self.collected)
519    }
520}
521
522// ---------- NullSerializer (#92) ---------------------------------
523//
524// A `Serializer` that discards everything it visits, allocating
525// nothing. Used by `ProjectBuilder` for the document's unwanted
526// fields — most importantly the multi-hundred-byte `payload` that the
527// old `to_dynamic` path cloned into a `Dynamic::Bytes` (and then
528// dropped). Every method returns the unit `Ok = ()`; the composite
529// methods return `Self` so nested unwanted structures are likewise
530// drained without building a `Dynamic`. The walk is depth-UNBOUNDED
531// by design: it allocates no per-node memory, so a deep unwanted
532// subtree costs only stack frames inside serde's own bounded derive
533// output — never a `Dynamic` node.
534
535/// No-op serializer: visits and discards, allocates nothing.
536struct NullSerializer;
537
538#[allow(clippy::unused_self)] // serde's by-value receiver; methods are intentionally no-ops
539impl ser::Serializer for NullSerializer {
540    type Ok = ();
541    type Error = DynamicSerError;
542    type SerializeSeq = Self;
543    type SerializeTuple = Self;
544    type SerializeTupleStruct = Self;
545    type SerializeTupleVariant = Self;
546    type SerializeMap = Self;
547    type SerializeStruct = Self;
548    type SerializeStructVariant = Self;
549
550    fn serialize_bool(self, _v: bool) -> DynRes<()> {
551        Ok(())
552    }
553    fn serialize_i8(self, _v: i8) -> DynRes<()> {
554        Ok(())
555    }
556    fn serialize_i16(self, _v: i16) -> DynRes<()> {
557        Ok(())
558    }
559    fn serialize_i32(self, _v: i32) -> DynRes<()> {
560        Ok(())
561    }
562    fn serialize_i64(self, _v: i64) -> DynRes<()> {
563        Ok(())
564    }
565    fn serialize_u8(self, _v: u8) -> DynRes<()> {
566        Ok(())
567    }
568    fn serialize_u16(self, _v: u16) -> DynRes<()> {
569        Ok(())
570    }
571    fn serialize_u32(self, _v: u32) -> DynRes<()> {
572        Ok(())
573    }
574    fn serialize_u64(self, _v: u64) -> DynRes<()> {
575        Ok(())
576    }
577    fn serialize_f32(self, _v: f32) -> DynRes<()> {
578        Ok(())
579    }
580    fn serialize_f64(self, _v: f64) -> DynRes<()> {
581        Ok(())
582    }
583    fn serialize_char(self, _v: char) -> DynRes<()> {
584        Ok(())
585    }
586    fn serialize_str(self, _v: &str) -> DynRes<()> {
587        Ok(())
588    }
589    fn serialize_bytes(self, _v: &[u8]) -> DynRes<()> {
590        Ok(())
591    }
592    fn serialize_none(self) -> DynRes<()> {
593        Ok(())
594    }
595    fn serialize_some<T: ?Sized + Serialize>(self, v: &T) -> DynRes<()> {
596        v.serialize(self)
597    }
598    fn serialize_unit(self) -> DynRes<()> {
599        Ok(())
600    }
601    fn serialize_unit_struct(self, _name: &'static str) -> DynRes<()> {
602        Ok(())
603    }
604    fn serialize_unit_variant(
605        self,
606        _name: &'static str,
607        _variant_index: u32,
608        _variant: &'static str,
609    ) -> DynRes<()> {
610        Ok(())
611    }
612    fn serialize_newtype_struct<T: ?Sized + Serialize>(
613        self,
614        _name: &'static str,
615        v: &T,
616    ) -> DynRes<()> {
617        v.serialize(self)
618    }
619    fn serialize_newtype_variant<T: ?Sized + Serialize>(
620        self,
621        _name: &'static str,
622        _variant_index: u32,
623        _variant: &'static str,
624        v: &T,
625    ) -> DynRes<()> {
626        v.serialize(self)
627    }
628    fn serialize_seq(self, _len: Option<usize>) -> DynRes<Self> {
629        Ok(self)
630    }
631    fn serialize_tuple(self, _len: usize) -> DynRes<Self> {
632        Ok(self)
633    }
634    fn serialize_tuple_struct(self, _name: &'static str, _len: usize) -> DynRes<Self> {
635        Ok(self)
636    }
637    fn serialize_tuple_variant(
638        self,
639        _name: &'static str,
640        _variant_index: u32,
641        _variant: &'static str,
642        _len: usize,
643    ) -> DynRes<Self> {
644        Ok(self)
645    }
646    fn serialize_map(self, _len: Option<usize>) -> DynRes<Self> {
647        Ok(self)
648    }
649    fn serialize_struct(self, _name: &'static str, _len: usize) -> DynRes<Self> {
650        Ok(self)
651    }
652    fn serialize_struct_variant(
653        self,
654        _name: &'static str,
655        _variant_index: u32,
656        _variant: &'static str,
657        _len: usize,
658    ) -> DynRes<Self> {
659        Ok(self)
660    }
661}
662
663impl ser::SerializeSeq for NullSerializer {
664    type Ok = ();
665    type Error = DynamicSerError;
666    fn serialize_element<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
667        value.serialize(NullSerializer)
668    }
669    fn end(self) -> DynRes<()> {
670        Ok(())
671    }
672}
673
674impl ser::SerializeTuple for NullSerializer {
675    type Ok = ();
676    type Error = DynamicSerError;
677    fn serialize_element<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
678        value.serialize(NullSerializer)
679    }
680    fn end(self) -> DynRes<()> {
681        Ok(())
682    }
683}
684
685impl ser::SerializeTupleStruct for NullSerializer {
686    type Ok = ();
687    type Error = DynamicSerError;
688    fn serialize_field<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
689        value.serialize(NullSerializer)
690    }
691    fn end(self) -> DynRes<()> {
692        Ok(())
693    }
694}
695
696impl ser::SerializeTupleVariant for NullSerializer {
697    type Ok = ();
698    type Error = DynamicSerError;
699    fn serialize_field<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
700        value.serialize(NullSerializer)
701    }
702    fn end(self) -> DynRes<()> {
703        Ok(())
704    }
705}
706
707impl ser::SerializeMap for NullSerializer {
708    type Ok = ();
709    type Error = DynamicSerError;
710    fn serialize_key<T: ?Sized + Serialize>(&mut self, key: &T) -> DynRes<()> {
711        key.serialize(NullSerializer)
712    }
713    fn serialize_value<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
714        value.serialize(NullSerializer)
715    }
716    fn end(self) -> DynRes<()> {
717        Ok(())
718    }
719}
720
721impl ser::SerializeStruct for NullSerializer {
722    type Ok = ();
723    type Error = DynamicSerError;
724    fn serialize_field<T: ?Sized + Serialize>(
725        &mut self,
726        _key: &'static str,
727        value: &T,
728    ) -> DynRes<()> {
729        value.serialize(NullSerializer)
730    }
731    fn end(self) -> DynRes<()> {
732        Ok(())
733    }
734}
735
736impl ser::SerializeStructVariant for NullSerializer {
737    type Ok = ();
738    type Error = DynamicSerError;
739    fn serialize_field<T: ?Sized + Serialize>(
740        &mut self,
741        _key: &'static str,
742        value: &T,
743    ) -> DynRes<()> {
744        value.serialize(NullSerializer)
745    }
746    fn end(self) -> DynRes<()> {
747        Ok(())
748    }
749}
750
751// ---------- DynamicSerializer ------------------------------------
752
753/// Errors surfaced from the [`DynamicSerializer`].
754///
755/// Lives next to the serializer because every serde method returns
756/// a `Result<_, Self::Error>`. The error carries an owned message
757/// so a wrapped `serde::ser::Error::custom` does not leak `'static`.
758#[derive(Debug)]
759struct DynamicSerError(String);
760
761impl std::fmt::Display for DynamicSerError {
762    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
763        f.write_str(&self.0)
764    }
765}
766
767impl std::error::Error for DynamicSerError {}
768
769impl ser::Error for DynamicSerError {
770    fn custom<T: std::fmt::Display>(msg: T) -> Self {
771        Self(msg.to_string())
772    }
773}
774
775impl From<DynamicSerError> for Error {
776    fn from(_e: DynamicSerError) -> Self {
777        // The serializer's errors are programmer-bug paths in the
778        // `Document` impl (non-stringable map keys, depth overflow,
779        // a user `Serialize` impl returning a custom error). All
780        // collapse to a single static `Error::InvalidArgument` so
781        // we do not need to grow `Error` with an owned-string
782        // variant just for this path. The serializer's `Display`
783        // body keeps the per-instance message available for logs
784        // if a caller `Debug`-prints the wrapped error.
785        Error::InvalidArgument(
786            "index extraction: Document Serialize impl rejected reflection \
787             (see DynamicSerError for the detail)",
788        )
789    }
790}
791
792/// Serde `Serializer` that converts the visited value into a
793/// `Dynamic` tree. Bounded-depth (Rule 1) via the `depth` counter.
794struct DynamicSerializer {
795    depth: usize,
796}
797
798impl DynamicSerializer {
799    fn deeper(&self) -> Result<Self> {
800        let next = self
801            .depth
802            .checked_add(1)
803            .ok_or(Error::InvalidArgument("index extraction: depth overflow"))?;
804        if next >= MAX_REFLECT_DEPTH {
805            return Err(Error::InvalidArgument(
806                "index extraction: max reflection depth exceeded",
807            ));
808        }
809        Ok(Self { depth: next })
810    }
811}
812
813/// Local helper for serde plumbing: a `Result<Dynamic,
814/// DynamicSerError>` alias.
815type DynRes<T = Dynamic> = std::result::Result<T, DynamicSerError>;
816
817#[allow(clippy::cast_possible_truncation)] // i128 → i64 / u128 → u64 narrowing is the encoded form
818#[allow(clippy::cast_sign_loss)] // sign loss on the unsigned-only `serialize_u*` is intentional
819impl ser::Serializer for DynamicSerializer {
820    type Ok = Dynamic;
821    type Error = DynamicSerError;
822    type SerializeSeq = SeqBuilder;
823    type SerializeTuple = SeqBuilder;
824    type SerializeTupleStruct = SeqBuilder;
825    type SerializeTupleVariant = SeqBuilder;
826    type SerializeMap = MapBuilder;
827    type SerializeStruct = MapBuilder;
828    type SerializeStructVariant = MapBuilder;
829
830    fn serialize_bool(self, v: bool) -> DynRes {
831        Ok(Dynamic::Bool(v))
832    }
833    fn serialize_i8(self, v: i8) -> DynRes {
834        Ok(Dynamic::I64(i64::from(v)))
835    }
836    fn serialize_i16(self, v: i16) -> DynRes {
837        Ok(Dynamic::I64(i64::from(v)))
838    }
839    fn serialize_i32(self, v: i32) -> DynRes {
840        Ok(Dynamic::I64(i64::from(v)))
841    }
842    fn serialize_i64(self, v: i64) -> DynRes {
843        Ok(Dynamic::I64(v))
844    }
845    fn serialize_i128(self, v: i128) -> DynRes {
846        Ok(Dynamic::I64(v as i64))
847    }
848    fn serialize_u8(self, v: u8) -> DynRes {
849        Ok(Dynamic::U64(u64::from(v)))
850    }
851    fn serialize_u16(self, v: u16) -> DynRes {
852        Ok(Dynamic::U64(u64::from(v)))
853    }
854    fn serialize_u32(self, v: u32) -> DynRes {
855        Ok(Dynamic::U64(u64::from(v)))
856    }
857    fn serialize_u64(self, v: u64) -> DynRes {
858        Ok(Dynamic::U64(v))
859    }
860    fn serialize_u128(self, v: u128) -> DynRes {
861        Ok(Dynamic::U64(v as u64))
862    }
863    fn serialize_f32(self, v: f32) -> DynRes {
864        Ok(Dynamic::F64(f64::from(v)))
865    }
866    fn serialize_f64(self, v: f64) -> DynRes {
867        Ok(Dynamic::F64(v))
868    }
869    fn serialize_char(self, v: char) -> DynRes {
870        Ok(Dynamic::String(v.to_string()))
871    }
872    fn serialize_str(self, v: &str) -> DynRes {
873        Ok(Dynamic::String(v.to_owned()))
874    }
875    fn serialize_bytes(self, v: &[u8]) -> DynRes {
876        Ok(Dynamic::Bytes(v.to_vec()))
877    }
878    fn serialize_none(self) -> DynRes {
879        Ok(Dynamic::Null)
880    }
881    fn serialize_some<T: ?Sized + Serialize>(self, v: &T) -> DynRes {
882        v.serialize(self)
883    }
884    fn serialize_unit(self) -> DynRes {
885        Ok(Dynamic::Null)
886    }
887    fn serialize_unit_struct(self, _name: &'static str) -> DynRes {
888        Ok(Dynamic::Null)
889    }
890    fn serialize_unit_variant(
891        self,
892        _name: &'static str,
893        _variant_index: u32,
894        variant: &'static str,
895    ) -> DynRes {
896        Ok(Dynamic::String(variant.to_owned()))
897    }
898    fn serialize_newtype_struct<T: ?Sized + Serialize>(self, _name: &'static str, v: &T) -> DynRes {
899        v.serialize(self)
900    }
901    fn serialize_newtype_variant<T: ?Sized + Serialize>(
902        self,
903        _name: &'static str,
904        _variant_index: u32,
905        variant: &'static str,
906        v: &T,
907    ) -> DynRes {
908        let inner = v.serialize(deeper(&self)?)?;
909        let mut m = BTreeMap::new();
910        m.insert(variant.to_owned(), inner);
911        Ok(Dynamic::Map(m))
912    }
913    fn serialize_seq(self, len: Option<usize>) -> DynRes<SeqBuilder> {
914        let cap = len.unwrap_or(0).min(MAX_EACH_ENTRIES);
915        Ok(SeqBuilder {
916            depth: self.depth,
917            items: Vec::with_capacity(cap),
918        })
919    }
920    fn serialize_tuple(self, len: usize) -> DynRes<SeqBuilder> {
921        self.serialize_seq(Some(len))
922    }
923    fn serialize_tuple_struct(self, _name: &'static str, len: usize) -> DynRes<SeqBuilder> {
924        self.serialize_seq(Some(len))
925    }
926    fn serialize_tuple_variant(
927        self,
928        _name: &'static str,
929        _variant_index: u32,
930        _variant: &'static str,
931        len: usize,
932    ) -> DynRes<SeqBuilder> {
933        self.serialize_seq(Some(len))
934    }
935    fn serialize_map(self, _len: Option<usize>) -> DynRes<MapBuilder> {
936        Ok(MapBuilder {
937            depth: self.depth,
938            map: BTreeMap::new(),
939            pending_key: None,
940        })
941    }
942    fn serialize_struct(self, _name: &'static str, _len: usize) -> DynRes<MapBuilder> {
943        self.serialize_map(None)
944    }
945    fn serialize_struct_variant(
946        self,
947        _name: &'static str,
948        _variant_index: u32,
949        _variant: &'static str,
950        _len: usize,
951    ) -> DynRes<MapBuilder> {
952        self.serialize_map(None)
953    }
954}
955
956/// Construct a deeper [`DynamicSerializer`] for nested entries; the
957/// result mirrors `self.depth + 1` and trips
958/// [`MAX_REFLECT_DEPTH`].
959fn deeper(s: &DynamicSerializer) -> DynRes<DynamicSerializer> {
960    s.deeper().map_err(|e| {
961        DynamicSerError(match e {
962            Error::InvalidArgument(msg) => msg.to_owned(),
963            other => other.to_string(),
964        })
965    })
966}
967
968/// Builder for sequence / tuple / tuple-struct / tuple-variant
969/// shapes. Each `serialize_element` recurses into a deeper
970/// `DynamicSerializer`.
971struct SeqBuilder {
972    depth: usize,
973    items: Vec<Dynamic>,
974}
975
976impl ser::SerializeSeq for SeqBuilder {
977    type Ok = Dynamic;
978    type Error = DynamicSerError;
979    fn serialize_element<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
980        if self.items.len() >= MAX_EACH_ENTRIES {
981            return Err(DynamicSerError(
982                "index extraction: sequence exceeds MAX_EACH_ENTRIES".to_owned(),
983            ));
984        }
985        let item = value.serialize(DynamicSerializer {
986            depth: self.depth + 1,
987        })?;
988        self.items.push(item);
989        Ok(())
990    }
991    fn end(self) -> DynRes {
992        Ok(Dynamic::Seq(self.items))
993    }
994}
995
996impl ser::SerializeTuple for SeqBuilder {
997    type Ok = Dynamic;
998    type Error = DynamicSerError;
999    fn serialize_element<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
1000        <Self as ser::SerializeSeq>::serialize_element(self, value)
1001    }
1002    fn end(self) -> DynRes {
1003        <Self as ser::SerializeSeq>::end(self)
1004    }
1005}
1006
1007impl ser::SerializeTupleStruct for SeqBuilder {
1008    type Ok = Dynamic;
1009    type Error = DynamicSerError;
1010    fn serialize_field<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
1011        <Self as ser::SerializeSeq>::serialize_element(self, value)
1012    }
1013    fn end(self) -> DynRes {
1014        <Self as ser::SerializeSeq>::end(self)
1015    }
1016}
1017
1018impl ser::SerializeTupleVariant for SeqBuilder {
1019    type Ok = Dynamic;
1020    type Error = DynamicSerError;
1021    fn serialize_field<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
1022        <Self as ser::SerializeSeq>::serialize_element(self, value)
1023    }
1024    fn end(self) -> DynRes {
1025        <Self as ser::SerializeSeq>::end(self)
1026    }
1027}
1028
1029/// Builder for map / struct / struct-variant shapes. Uses
1030/// `pending_key` to handle the `serialize_key` / `serialize_value`
1031/// pairing required by `SerializeMap`.
1032struct MapBuilder {
1033    depth: usize,
1034    map: BTreeMap<String, Dynamic>,
1035    pending_key: Option<String>,
1036}
1037
1038impl MapBuilder {
1039    fn deeper_serializer(&self) -> DynamicSerializer {
1040        DynamicSerializer {
1041            depth: self.depth + 1,
1042        }
1043    }
1044}
1045
1046impl ser::SerializeMap for MapBuilder {
1047    type Ok = Dynamic;
1048    type Error = DynamicSerError;
1049    fn serialize_key<T: ?Sized + Serialize>(&mut self, key: &T) -> DynRes<()> {
1050        let key_dyn = key.serialize(self.deeper_serializer())?;
1051        let key_string = match key_dyn {
1052            Dynamic::String(s) => s,
1053            Dynamic::U64(n) => n.to_string(),
1054            Dynamic::I64(n) => n.to_string(),
1055            Dynamic::Bool(b) => b.to_string(),
1056            other => {
1057                return Err(DynamicSerError(format!(
1058                    "map key must be stringable (got {other:?})"
1059                )));
1060            }
1061        };
1062        self.pending_key = Some(key_string);
1063        Ok(())
1064    }
1065    fn serialize_value<T: ?Sized + Serialize>(&mut self, value: &T) -> DynRes<()> {
1066        let key = self
1067            .pending_key
1068            .take()
1069            .ok_or_else(|| DynamicSerError("map value without preceding key".to_owned()))?;
1070        let val = value.serialize(self.deeper_serializer())?;
1071        self.map.insert(key, val);
1072        Ok(())
1073    }
1074    fn end(self) -> DynRes {
1075        Ok(Dynamic::Map(self.map))
1076    }
1077}
1078
1079impl ser::SerializeStruct for MapBuilder {
1080    type Ok = Dynamic;
1081    type Error = DynamicSerError;
1082    fn serialize_field<T: ?Sized + Serialize>(
1083        &mut self,
1084        key: &'static str,
1085        value: &T,
1086    ) -> DynRes<()> {
1087        let val = value.serialize(self.deeper_serializer())?;
1088        self.map.insert(key.to_owned(), val);
1089        Ok(())
1090    }
1091    fn end(self) -> DynRes {
1092        Ok(Dynamic::Map(self.map))
1093    }
1094}
1095
1096impl ser::SerializeStructVariant for MapBuilder {
1097    type Ok = Dynamic;
1098    type Error = DynamicSerError;
1099    fn serialize_field<T: ?Sized + Serialize>(
1100        &mut self,
1101        key: &'static str,
1102        value: &T,
1103    ) -> DynRes<()> {
1104        <Self as ser::SerializeStruct>::serialize_field(self, key, value)
1105    }
1106    fn end(self) -> DynRes {
1107        <Self as ser::SerializeStruct>::end(self)
1108    }
1109}
1110
1111#[cfg(test)]
1112mod tests {
1113    use super::*;
1114    use serde::{Deserialize, Serialize};
1115
1116    #[derive(Debug, Serialize, Deserialize)]
1117    struct Customer {
1118        email: String,
1119        score: i64,
1120        tags: Vec<String>,
1121    }
1122
1123    impl Document for Customer {
1124        const COLLECTION: &'static str = "customers";
1125        const VERSION: u32 = 1;
1126    }
1127
1128    #[derive(Debug, Serialize, Deserialize)]
1129    struct Order {
1130        customer_id: u64,
1131        placed_at: u64,
1132        amount_cents: i64,
1133    }
1134
1135    impl Document for Order {
1136        const COLLECTION: &'static str = "orders";
1137        const VERSION: u32 = 1;
1138    }
1139
1140    #[test]
1141    fn dynamic_reflection_of_simple_struct() {
1142        let c = Customer {
1143            email: "ada@example.com".to_owned(),
1144            score: 42,
1145            tags: vec!["alpha".to_owned(), "beta".to_owned()],
1146        };
1147        let d = to_dynamic(&c).expect("reflect");
1148        let Dynamic::Map(map) = &d else {
1149            panic!("expected Map, got {d:?}");
1150        };
1151        assert_eq!(
1152            map.get("email"),
1153            Some(&Dynamic::String("ada@example.com".to_owned())),
1154        );
1155        assert_eq!(map.get("score"), Some(&Dynamic::I64(42)));
1156        match map.get("tags") {
1157            Some(Dynamic::Seq(items)) => assert_eq!(items.len(), 2),
1158            other => panic!("expected Seq, got {other:?}"),
1159        }
1160    }
1161
1162    #[test]
1163    fn standard_extract_returns_one_key() {
1164        let c = Customer {
1165            email: "ada@example.com".to_owned(),
1166            score: 7,
1167            tags: vec![],
1168        };
1169        let spec = IndexSpec::standard("by_email", "email").expect("spec");
1170        let keys = extract_index_keys(Customer::COLLECTION, &spec, &c).expect("extract");
1171        assert_eq!(keys.len(), 1);
1172        // Encoding should be the order-preserving String form —
1173        // double-check by encoding the same Dynamic directly.
1174        let expected = encode_field(&Dynamic::String("ada@example.com".to_owned())).expect("enc");
1175        assert_eq!(keys[0], expected);
1176    }
1177
1178    #[test]
1179    fn unique_extract_returns_one_key() {
1180        let c = Customer {
1181            email: "u@e.com".to_owned(),
1182            score: 1,
1183            tags: vec![],
1184        };
1185        let spec = IndexSpec::unique("by_email", "email").expect("spec");
1186        let keys = extract_index_keys(Customer::COLLECTION, &spec, &c).expect("extract");
1187        assert_eq!(keys.len(), 1);
1188    }
1189
1190    #[test]
1191    fn each_extract_returns_n_keys() {
1192        let c = Customer {
1193            email: "x".to_owned(),
1194            score: 0,
1195            tags: vec!["a".to_owned(), "b".to_owned(), "c".to_owned()],
1196        };
1197        let spec = IndexSpec::each("by_tag", "tags").expect("spec");
1198        let keys = extract_index_keys(Customer::COLLECTION, &spec, &c).expect("extract");
1199        assert_eq!(keys.len(), 3);
1200        // Each entry is the encoded String of one element.
1201        let want_a = encode_field(&Dynamic::String("a".to_owned())).expect("enc");
1202        assert_eq!(keys[0], want_a);
1203    }
1204
1205    #[test]
1206    fn each_extract_on_empty_seq_returns_empty_vec() {
1207        let c = Customer {
1208            email: "x".to_owned(),
1209            score: 0,
1210            tags: vec![],
1211        };
1212        let spec = IndexSpec::each("by_tag", "tags").expect("spec");
1213        let keys = extract_index_keys(Customer::COLLECTION, &spec, &c).expect("extract");
1214        assert!(keys.is_empty());
1215    }
1216
1217    #[test]
1218    fn composite_extract_returns_one_envelope_key() {
1219        let o = Order {
1220            customer_id: 7,
1221            placed_at: 12_345,
1222            amount_cents: 100,
1223        };
1224        let spec = IndexSpec::composite("by_ct", &["customer_id", "placed_at"]).expect("spec");
1225        let keys = extract_index_keys(Order::COLLECTION, &spec, &o).expect("extract");
1226        assert_eq!(keys.len(), 1);
1227        // Envelope tag must be the documented value.
1228        assert_eq!(keys[0].as_bytes()[0], crate::index::key::COMPOSITE_TAG);
1229    }
1230
1231    #[test]
1232    fn missing_field_path_errors() {
1233        let c = Customer {
1234            email: "x".to_owned(),
1235            score: 0,
1236            tags: vec![],
1237        };
1238        let spec = IndexSpec::standard("by_nope", "nope").expect("spec");
1239        let err = extract_index_keys(Customer::COLLECTION, &spec, &c).expect_err("missing");
1240        match err {
1241            Error::IndexFieldMissing {
1242                collection,
1243                index,
1244                path,
1245            } => {
1246                assert_eq!(collection, "customers");
1247                assert_eq!(index, "by_nope");
1248                assert_eq!(path, "nope");
1249            }
1250            other => panic!("expected IndexFieldMissing, got {other:?}"),
1251        }
1252    }
1253
1254    #[test]
1255    fn each_on_non_seq_field_errors() {
1256        let c = Customer {
1257            email: "x".to_owned(),
1258            score: 99,
1259            tags: vec![],
1260        };
1261        // `score` is an i64, not a Vec — Each must reject.
1262        let spec = IndexSpec::each("by_score", "score").expect("spec");
1263        let err = extract_index_keys(Customer::COLLECTION, &spec, &c).expect_err("type");
1264        match err {
1265            Error::IndexFieldTypeMismatch {
1266                collection,
1267                index,
1268                path,
1269                expected,
1270                found,
1271            } => {
1272                assert_eq!(collection, "customers");
1273                assert_eq!(index, "by_score");
1274                assert_eq!(path, "score");
1275                assert_eq!(expected, "Seq");
1276                assert_eq!(found, "I64");
1277            }
1278            other => panic!("expected IndexFieldTypeMismatch, got {other:?}"),
1279        }
1280    }
1281
1282    #[test]
1283    fn composite_decode_round_trip_matches_direct_encoding() {
1284        let o = Order {
1285            customer_id: 7,
1286            placed_at: 12_345,
1287            amount_cents: 100,
1288        };
1289        let spec = IndexSpec::composite("by_ct", &["customer_id", "placed_at"]).expect("spec");
1290        let extracted = extract_index_keys(Order::COLLECTION, &spec, &o).expect("extract");
1291        // Manually build the same composite by reflecting + encoding.
1292        let direct = encode_index_key(
1293            &spec,
1294            &[Dynamic::U64(o.customer_id), Dynamic::U64(o.placed_at)],
1295        )
1296        .expect("direct");
1297        assert_eq!(extracted[0], direct);
1298    }
1299
1300    // ---------- #92 byte-identity guardrail ----------------------
1301    //
1302    // The projecting path (`extract_index_keys`, which now routes
1303    // Standard / Unique / Composite / Each through `FieldProjector`)
1304    // MUST yield an `EncodedIndexKey` byte-identical to the legacy
1305    // full-document path (`to_dynamic` + `lookup_field` + encode). The
1306    // on-disk index-key format must not move. `legacy_extract_*`
1307    // reproduces the pre-#92 logic verbatim using the still-present
1308    // `to_dynamic`, then every shape asserts equality against the new
1309    // path.
1310
1311    /// Legacy scalar/each extraction: full-doc reflect → `get` →
1312    /// encode, exactly as the code did before #92.
1313    fn legacy_extract<T: Document>(
1314        collection: &str,
1315        spec: &IndexSpec,
1316        doc: &T,
1317    ) -> Result<Vec<EncodedIndexKey>> {
1318        let dynamic = to_dynamic(doc)?;
1319        let lookup = |path: &str| -> Result<Dynamic> {
1320            dynamic
1321                .get(path)
1322                .cloned()
1323                .ok_or_else(|| Error::IndexFieldMissing {
1324                    collection: collection.to_owned(),
1325                    index: spec.name.clone(),
1326                    path: path.to_owned(),
1327                })
1328        };
1329        match spec.kind {
1330            IndexKind::Standard | IndexKind::Unique => {
1331                let v = lookup(&spec.key_paths[0])?;
1332                Ok(vec![encode_index_key(spec, std::slice::from_ref(&v))?])
1333            }
1334            IndexKind::Each => {
1335                let v = lookup(&spec.key_paths[0])?;
1336                let Dynamic::Seq(items) = v else {
1337                    panic!("legacy each on non-seq");
1338                };
1339                items.iter().map(encode_field).collect()
1340            }
1341            IndexKind::Composite => {
1342                let mut fields = Vec::new();
1343                for p in &spec.key_paths {
1344                    fields.push(lookup(p)?);
1345                }
1346                Ok(vec![encode_index_key(spec, &fields)?])
1347            }
1348        }
1349    }
1350
1351    /// Every indexable field shape, in a single doc, so one struct
1352    /// exercises u64 / i64 (sign split) / String / bool / Option
1353    /// (Some + None) / newtype / enum / f64 / bytes for Standard,
1354    /// Unique, and Composite.
1355    #[derive(Debug, Serialize, Deserialize)]
1356    struct Shapes {
1357        u: u64,
1358        i_pos: i64,
1359        i_neg: i64,
1360        i_zero: i64,
1361        s: String,
1362        flag: bool,
1363        opt_some: Option<u64>,
1364        opt_none: Option<u64>,
1365        nt: Newtype,
1366        en: Color,
1367        f: f64,
1368        b: Bytes,
1369        payload: Vec<u8>, // the unindexed "big" field #92 must skip
1370    }
1371
1372    #[derive(Debug, Serialize, Deserialize)]
1373    struct Newtype(i64);
1374
1375    #[derive(Debug, Serialize, Deserialize)]
1376    enum Color {
1377        Red,
1378        Green,
1379    }
1380
1381    /// A bytes-valued field. Hand-written `Serialize` so it drives
1382    /// `serialize_bytes` (→ `Dynamic::Bytes`) rather than a per-element
1383    /// seq — the shape the encoder's `TAG_BYTES` arm consumes.
1384    #[derive(Debug)]
1385    struct Bytes(Vec<u8>);
1386
1387    impl Serialize for Bytes {
1388        fn serialize<S: serde::Serializer>(&self, ser: S) -> std::result::Result<S::Ok, S::Error> {
1389            ser.serialize_bytes(&self.0)
1390        }
1391    }
1392
1393    impl<'de> Deserialize<'de> for Bytes {
1394        fn deserialize<D: serde::Deserializer<'de>>(de: D) -> std::result::Result<Self, D::Error> {
1395            // Only `Serialize` is exercised by extraction; a permissive
1396            // byte-buf deserialize keeps `Shapes: DeserializeOwned`
1397            // (the `Document` bound) satisfied.
1398            let v = <Vec<u8>>::deserialize(de)?;
1399            Ok(Bytes(v))
1400        }
1401    }
1402
1403    impl Document for Shapes {
1404        const COLLECTION: &'static str = "shapes";
1405        const VERSION: u32 = 1;
1406    }
1407
1408    fn sample_shapes() -> Shapes {
1409        Shapes {
1410            u: 7,
1411            i_pos: 42,
1412            i_neg: -42,
1413            i_zero: 0,
1414            s: "hello".to_owned(),
1415            flag: true,
1416            opt_some: Some(99),
1417            opt_none: None,
1418            nt: Newtype(-1),
1419            en: Color::Green,
1420            f: -1.5,
1421            b: Bytes(vec![0x00, 0x01, 0xFF]),
1422            payload: vec![0xAB; 480],
1423        }
1424    }
1425
1426    /// Assert the projecting path is byte-identical to the legacy
1427    /// full-doc path for `spec` against `doc`.
1428    fn assert_byte_identical<T: Document>(spec: &IndexSpec, doc: &T) {
1429        let new = extract_index_keys(T::COLLECTION, spec, doc).expect("project extract");
1430        let old = legacy_extract(T::COLLECTION, spec, doc).expect("legacy extract");
1431        assert_eq!(
1432            new, old,
1433            "byte mismatch for spec={spec:?}: projecting path diverged from full-doc path"
1434        );
1435    }
1436
1437    #[test]
1438    fn project_standard_unique_byte_identical_to_full_doc() {
1439        let doc = sample_shapes();
1440        // One Standard + one Unique spec per scalar field shape.
1441        for field in [
1442            "u", "i_pos", "i_neg", "i_zero", "s", "flag", "opt_some", "opt_none", "nt", "en", "f",
1443            "b",
1444        ] {
1445            let std = IndexSpec::standard("ix", field).expect("standard spec");
1446            assert_byte_identical(&std, &doc);
1447            let uniq = IndexSpec::unique("ix", field).expect("unique spec");
1448            assert_byte_identical(&uniq, &doc);
1449        }
1450    }
1451
1452    #[test]
1453    fn project_composite_byte_identical_to_full_doc() {
1454        let doc = sample_shapes();
1455        // Multi-field composites spanning the variant zoo.
1456        let cases: &[&[&str]] = &[
1457            &["u", "i_neg"],
1458            &["s", "flag"],
1459            &["i_pos", "i_zero", "u"],
1460            &["opt_some", "opt_none"],
1461            &["nt", "en", "f"],
1462            &["b", "u", "s"],
1463        ];
1464        for paths in cases {
1465            let spec = IndexSpec::composite("ix", paths).expect("composite spec");
1466            assert_byte_identical(&spec, &doc);
1467        }
1468    }
1469
1470    #[test]
1471    fn project_each_byte_identical_to_full_doc() {
1472        // `Each` over a Vec must still produce the per-element keys
1473        // identical to the full-doc path.
1474        let c = Customer {
1475            email: "x".to_owned(),
1476            score: 0,
1477            tags: vec!["a".to_owned(), "bb".to_owned(), "ccc".to_owned()],
1478        };
1479        let spec = IndexSpec::each("by_tag", "tags").expect("each spec");
1480        assert_byte_identical(&spec, &c);
1481        // Empty seq → empty key set on both paths.
1482        let empty = Customer {
1483            email: "x".to_owned(),
1484            score: 0,
1485            tags: vec![],
1486        };
1487        assert_byte_identical(&spec, &empty);
1488    }
1489
1490    #[test]
1491    fn project_missing_field_errors_identically() {
1492        let doc = sample_shapes();
1493        let spec = IndexSpec::standard("by_nope", "nope").expect("spec");
1494        let new = extract_index_keys(Shapes::COLLECTION, &spec, &doc).expect_err("missing");
1495        let old = legacy_extract(Shapes::COLLECTION, &spec, &doc).expect_err("legacy missing");
1496        // Same variant + same fields.
1497        assert!(matches!(new, Error::IndexFieldMissing { .. }));
1498        assert_eq!(format!("{new:?}"), format!("{old:?}"));
1499    }
1500
1501    #[test]
1502    fn project_each_on_non_seq_errors_identically() {
1503        let doc = sample_shapes();
1504        // `u` is a u64, not a Vec — Each must reject with a type
1505        // mismatch, identical to the full-doc path's `found`.
1506        let spec = IndexSpec::each("by_u", "u").expect("spec");
1507        let new = extract_index_keys(Shapes::COLLECTION, &spec, &doc).expect_err("type");
1508        match new {
1509            Error::IndexFieldTypeMismatch {
1510                ref path,
1511                expected,
1512                found,
1513                ..
1514            } => {
1515                assert_eq!(path, "u");
1516                assert_eq!(expected, "Seq");
1517                assert_eq!(found, "U64");
1518            }
1519            other => panic!("expected IndexFieldTypeMismatch, got {other:?}"),
1520        }
1521    }
1522}