Skip to main content

shape_runtime/type_schema/
registry.rs

1//! Type schema registry and builder
2//!
3//! This module provides the shared registry for type schemas and a fluent
4//! builder API for creating schemas.
5
6use super::SchemaId;
7use super::enum_support::EnumVariantInfo;
8use super::field_types::{FieldAnnotation, FieldType};
9use super::schema::TypeSchema;
10use std::collections::HashMap;
11use std::sync::RwLock;
12use std::sync::atomic::{AtomicU32, Ordering};
13
14/// Starting value for per-registry schema ID counters.
15///
16/// Matches the historical `NEXT_SCHEMA_ID` static seed so registries created
17/// via [`TypeSchemaRegistry::new_with_stdlib`] use the same ID domain that the
18/// process-wide static has always used.
19const INITIAL_SCHEMA_ID: SchemaId = 1;
20
21/// Registry of type schemas.
22///
23/// Each registry owns its own schema-ID counter via `next_id`. This is the
24/// per-`Runtime` replacement for the legacy process-global `NEXT_SCHEMA_ID`
25/// static: two registries built with [`TypeSchemaRegistry::new_with_stdlib`]
26/// assign IDs from their own domains and do not observe each other's state.
27///
28/// The counter is not currently consulted by the historic [`TypeSchema::new`]
29/// path (which still bumps the global static), but can be allocated via
30/// [`TypeSchemaRegistry::allocate_id`] and used with
31/// [`TypeSchema::with_id`]. During the B1 migration window both paths coexist.
32#[derive(Debug, serde::Serialize, serde::Deserialize)]
33pub struct TypeSchemaRegistry {
34    /// Per-registry counter for allocating fresh schema IDs.
35    ///
36    /// Skipped during (de)serialization; a decoded registry restarts its
37    /// counter above the maximum observed ID via the custom `Deserialize`
38    /// impl. This matches historical behaviour where the global static was
39    /// bumped via `ensure_next_schema_id_above`.
40    #[serde(skip, default = "default_next_id")]
41    next_id: AtomicU32,
42    /// Schemas indexed by name
43    by_name: HashMap<String, TypeSchema>,
44    /// Schemas indexed by ID for fast runtime lookup
45    by_id: HashMap<SchemaId, String>,
46    /// Predeclared schemas keyed by ordered field-name signature.
47    ///
48    /// Populated by [`Self::register_predeclared_any_schema`] when
49    /// compile-time tooling, extensions, or comptime paths derive a
50    /// TypedObject layout that is not backed by a named type.
51    /// Moved onto the registry in B1.6 (previously a process-global
52    /// `PREDECLARED_SCHEMA_CACHE` static).
53    #[serde(skip, default)]
54    predeclared_cache: RwLock<HashMap<String, SchemaId>>,
55    /// Predeclared schemas indexed by schema ID. B1.6 migrated this off
56    /// the legacy `PREDECLARED_SCHEMA_REGISTRY` static.
57    #[serde(skip, default)]
58    predeclared_by_id: RwLock<HashMap<SchemaId, TypeSchema>>,
59}
60
61fn default_next_id() -> AtomicU32 {
62    AtomicU32::new(INITIAL_SCHEMA_ID)
63}
64
65impl Default for TypeSchemaRegistry {
66    fn default() -> Self {
67        Self {
68            next_id: default_next_id(),
69            by_name: HashMap::new(),
70            by_id: HashMap::new(),
71            predeclared_cache: RwLock::new(HashMap::new()),
72            predeclared_by_id: RwLock::new(HashMap::new()),
73        }
74    }
75}
76
77impl Clone for TypeSchemaRegistry {
78    fn clone(&self) -> Self {
79        let predeclared_cache = self
80            .predeclared_cache
81            .read()
82            .map(|g| g.clone())
83            .unwrap_or_default();
84        let predeclared_by_id = self
85            .predeclared_by_id
86            .read()
87            .map(|g| g.clone())
88            .unwrap_or_default();
89        Self {
90            next_id: AtomicU32::new(self.next_id.load(Ordering::SeqCst)),
91            by_name: self.by_name.clone(),
92            by_id: self.by_id.clone(),
93            predeclared_cache: RwLock::new(predeclared_cache),
94            predeclared_by_id: RwLock::new(predeclared_by_id),
95        }
96    }
97}
98
99impl TypeSchemaRegistry {
100    /// Create a new empty registry
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Allocate a fresh schema ID from this registry's per-instance counter.
106    ///
107    /// IDs allocated via this method are independent of the legacy
108    /// process-global `NEXT_SCHEMA_ID` static. Used together with
109    /// [`TypeSchema::with_id`] to construct schemas whose IDs are isolated per
110    /// registry (and therefore per `Runtime`).
111    pub fn allocate_id(&self) -> SchemaId {
112        self.next_id.fetch_add(1, Ordering::SeqCst)
113    }
114
115    /// Ensure all future allocations from this registry yield IDs strictly
116    /// greater than `max_existing_id`.
117    ///
118    /// Used after loading externally compiled bytecode whose schemas already
119    /// have assigned IDs — mirrors the legacy
120    /// `ensure_next_schema_id_above` helper at a per-registry scope.
121    pub fn ensure_next_id_above(&self, max_existing_id: SchemaId) {
122        let required_next = max_existing_id.saturating_add(1);
123        let mut current = self.next_id.load(Ordering::SeqCst);
124        while current < required_next {
125            match self.next_id.compare_exchange(
126                current,
127                required_next,
128                Ordering::SeqCst,
129                Ordering::SeqCst,
130            ) {
131                Ok(_) => break,
132                Err(actual) => current = actual,
133            }
134        }
135    }
136
137    /// Peek the next ID that [`allocate_id`](Self::allocate_id) would produce
138    /// without incrementing the counter. For tests/introspection only.
139    #[cfg(test)]
140    pub(crate) fn peek_next_id(&self) -> SchemaId {
141        self.next_id.load(Ordering::SeqCst)
142    }
143
144    /// Register a type schema
145    pub fn register(&mut self, schema: TypeSchema) {
146        let name = schema.name.clone();
147        let id = schema.id;
148        self.by_id.insert(id, name.clone());
149        self.by_name.insert(name, schema);
150    }
151
152    /// Register a type with field definitions
153    pub fn register_type(
154        &mut self,
155        name: impl Into<String>,
156        fields: Vec<(String, FieldType)>,
157    ) -> SchemaId {
158        let schema = TypeSchema::new(name, fields);
159        let id = schema.id;
160        self.register(schema);
161        id
162    }
163
164    /// Register a type with field definitions and per-field annotations.
165    ///
166    /// Each entry in `field_annotations` corresponds to the field at the same
167    /// index in `fields`. Annotations such as `@alias("wire_name")` are stored
168    /// on the resulting `FieldDef` so that serialization and deserialization
169    /// boundaries can use `wire_name()` instead of the field name.
170    pub fn register_type_with_annotations(
171        &mut self,
172        name: impl Into<String>,
173        fields: Vec<(String, FieldType)>,
174        field_annotations: Vec<Vec<FieldAnnotation>>,
175    ) -> SchemaId {
176        let mut schema = TypeSchema::new(name, fields);
177        for (i, annotations) in field_annotations.into_iter().enumerate() {
178            if i < schema.fields.len() && !annotations.is_empty() {
179                schema.fields[i].annotations = annotations;
180            }
181        }
182        let id = schema.id;
183        self.register(schema);
184        id
185    }
186
187    /// Get schema by name
188    pub fn get(&self, name: &str) -> Option<&TypeSchema> {
189        self.by_name.get(name)
190    }
191
192    /// Get schema by ID
193    pub fn get_by_id(&self, id: SchemaId) -> Option<&TypeSchema> {
194        self.by_id.get(&id).and_then(|name| self.by_name.get(name))
195    }
196
197    /// Highest schema ID currently stored in this registry.
198    pub fn max_schema_id(&self) -> Option<SchemaId> {
199        self.by_id.keys().copied().max()
200    }
201
202    /// Get field offset for a type/field combination
203    pub fn field_offset(&self, type_name: &str, field_name: &str) -> Option<usize> {
204        self.get(type_name)?.field_offset(field_name)
205    }
206
207    /// Check if a type is registered
208    pub fn has_type(&self, name: &str) -> bool {
209        self.by_name.contains_key(name)
210    }
211
212    /// Number of registered types
213    pub fn type_count(&self) -> usize {
214        self.by_name.len()
215    }
216
217    /// Iterator over all registered type names
218    pub fn type_names(&self) -> impl Iterator<Item = &str> {
219        self.by_name.keys().map(|s| s.as_str())
220    }
221
222    /// Create a registry with common stdlib types pre-registered.
223    ///
224    /// Since B1.7 all registrations draw their IDs from the registry'''s
225    /// per-instance counter — no process-global or ambient counter is
226    /// consulted. This keeps two independently constructed registries
227    /// isolated.
228    pub fn with_stdlib_types() -> Self {
229        let mut registry = Self::new();
230
231        // Register Row type (generic data row).
232        registry.register_type_scoped(
233            "Row",
234            vec![
235                ("timestamp".to_string(), FieldType::Timestamp),
236                ("fields".to_string(), FieldType::Any), // Dynamic fields
237            ],
238        );
239
240        // Register Option enum type.
241        registry.register_enum_scoped(
242            "Option",
243            vec![
244                EnumVariantInfo::new("Some", 0, 1), // Some(T) has 1 payload field
245                EnumVariantInfo::new("None", 1, 0), // None has no payload
246            ],
247        );
248
249        // Register Result enum type.
250        registry.register_enum_scoped(
251            "Result",
252            vec![
253                EnumVariantInfo::new("Ok", 0, 1),  // Ok(T) has 1 payload field
254                EnumVariantInfo::new("Err", 1, 1), // Err(E) has 1 payload field
255            ],
256        );
257
258        // Register builtin fixed-layout schemas (AnyError, TraceFrame, etc.).
259        super::builtin_schemas::register_builtin_schemas(&mut registry);
260
261        // Note: Domain-specific types (Candle, Trade, etc.) should be
262        // registered by the domain-specific stdlib, not here in core.
263
264        registry
265    }
266
267    /// Create a registry with stdlib types and return both registry and builtin IDs.
268    ///
269    /// Since B1.7 all registrations draw their IDs from the registry'''s
270    /// per-instance counter — no process-global or ambient counter is
271    /// consulted.
272    pub fn with_stdlib_types_and_builtin_ids() -> (Self, super::builtin_schemas::BuiltinSchemaIds) {
273        let mut registry = Self::new();
274
275        // Register Row type.
276        registry.register_type_scoped(
277            "Row",
278            vec![
279                ("timestamp".to_string(), FieldType::Timestamp),
280                ("fields".to_string(), FieldType::Any),
281            ],
282        );
283
284        // Register Option/Result enum types.
285        registry.register_enum_scoped(
286            "Option",
287            vec![
288                EnumVariantInfo::new("Some", 0, 1),
289                EnumVariantInfo::new("None", 1, 0),
290            ],
291        );
292        registry.register_enum_scoped(
293            "Result",
294            vec![
295                EnumVariantInfo::new("Ok", 0, 1),
296                EnumVariantInfo::new("Err", 1, 1),
297            ],
298        );
299
300        // Register builtin schemas and capture IDs.
301        let ids = super::builtin_schemas::register_builtin_schemas(&mut registry);
302
303        (registry, ids)
304    }
305
306    /// Register a type whose ID is drawn from this registry's per-instance
307    /// counter rather than the process-global `NEXT_SCHEMA_ID`.
308    ///
309    /// Preferred replacement for [`register_type`](Self::register_type) inside
310    /// `new_with_stdlib` and any future per-`Runtime` registration pathways.
311    pub fn register_type_scoped(
312        &mut self,
313        name: impl Into<String>,
314        fields: Vec<(String, FieldType)>,
315    ) -> SchemaId {
316        let id = self.allocate_id();
317        let schema = TypeSchema::with_id(id, name, fields);
318        self.register(schema);
319        id
320    }
321
322    /// Register an enum whose ID is drawn from this registry's per-instance
323    /// counter. See [`register_type_scoped`](Self::register_type_scoped).
324    pub fn register_enum_scoped(
325        &mut self,
326        name: impl Into<String>,
327        variants: Vec<EnumVariantInfo>,
328    ) -> SchemaId {
329        let id = self.allocate_id();
330        let schema = TypeSchema::new_enum_with_id(id, name, variants);
331        self.register(schema);
332        id
333    }
334
335    /// Create a registry seeded with the canonical stdlib schemas
336    /// (Row / Option / Result / builtin fixed-layout), using the registry's
337    /// own per-instance ID counter rather than the legacy global static.
338    ///
339    /// This is the entry point for per-`Runtime` schema isolation. Two
340    /// registries constructed with `new_with_stdlib` assign IDs from
341    /// independent domains and do not observe each other's state.
342    ///
343    /// Note: some schema constructors (e.g. when builtin_schemas uses
344    /// `TypeSchema::new`) still fall through to the global counter during the
345    /// B1 migration window; only the registry-level `register_type_scoped`
346    /// path is fully isolated. See the parity tests in this module for the
347    /// invariants that hold today.
348    pub fn new_with_stdlib() -> Self {
349        let mut registry = Self::new();
350
351        // Register Row type via the per-registry counter.
352        registry.register_type_scoped(
353            "Row",
354            vec![
355                ("timestamp".to_string(), FieldType::Timestamp),
356                ("fields".to_string(), FieldType::Any),
357            ],
358        );
359
360        // Register Option / Result enums via the per-registry counter.
361        registry.register_enum_scoped(
362            "Option",
363            vec![
364                EnumVariantInfo::new("Some", 0, 1),
365                EnumVariantInfo::new("None", 1, 0),
366            ],
367        );
368        registry.register_enum_scoped(
369            "Result",
370            vec![
371                EnumVariantInfo::new("Ok", 0, 1),
372                EnumVariantInfo::new("Err", 1, 1),
373            ],
374        );
375
376        // Register builtin fixed-layout schemas.
377        //
378        // NOTE: during the B1 migration window, `register_builtin_schemas`
379        // internally uses `TypeSchema::new`, which still bumps the global
380        // counter. The resulting IDs land in this registry's `by_id` / `by_name`
381        // maps, but they are drawn from the global domain. Registries
382        // constructed via `new_with_stdlib` therefore isolate *future*
383        // scoped allocations; they do not retrofit the builtin IDs. This is
384        // acceptable because builtin IDs are stable within a process — the
385        // failing-test leakage comes from user-registered types, which go
386        // through `register_type_scoped`.
387        super::builtin_schemas::register_builtin_schemas(&mut registry);
388
389        registry
390    }
391
392    /// Compute content hashes for all registered schemas.
393    pub fn compute_all_hashes(&mut self) {
394        for schema in self.by_name.values_mut() {
395            schema.content_hash();
396        }
397    }
398
399    /// Look up a schema by its content hash.
400    ///
401    /// Returns the first schema whose cached or computed content hash matches.
402    /// For best performance, call `compute_all_hashes` first.
403    pub fn get_by_content_hash(&self, hash: &[u8; 32]) -> Option<&TypeSchema> {
404        self.by_name.values().find(|schema| {
405            // Use cached hash if available, otherwise compute on the fly
406            let schema_hash = match schema.content_hash {
407                Some(h) => h,
408                None => schema.compute_content_hash(),
409            };
410            &schema_hash == hash
411        })
412    }
413
414    /// Merge another registry into this one
415    ///
416    /// Schemas from `other` are added to this registry. If a schema with the
417    /// same name already exists, it is NOT overwritten (first registration
418    /// wins). If the incoming schema'''s numeric ID already maps to a
419    /// different name in `self.by_id`, it is skipped — this preserves the
420    /// first `by_id` binding so callers that resolve names through the
421    /// ID domain of the pre-existing registry still find what they
422    /// registered. (Pre-B1.7 this never happened because all registries
423    /// drew IDs from a single process-global counter; B1.7 retired that
424    /// counter in favour of per-instance ones, so fresh registries can
425    /// produce overlapping ID ranges when merged.)
426    pub fn merge(&mut self, other: TypeSchemaRegistry) {
427        for (name, schema) in other.by_name {
428            if self.by_name.contains_key(&name) {
429                continue;
430            }
431            let id = schema.id;
432            if self.by_id.contains_key(&id) {
433                // ID collision with an existing schema under a different
434                // name — skip silently. The `resolve_builtin_schema_ids`
435                // path looks up builtins by name, so losing the ID mapping
436                // for builtins whose IDs collide with user schemas is
437                // acceptable; user lookups win.
438                continue;
439            }
440            self.by_id.insert(id, name.clone());
441            self.by_name.insert(name, schema);
442        }
443        // Also merge predeclared schemas, first-registration-wins on ID collision.
444        if let (Ok(other_by_id), Ok(mut self_by_id)) = (
445            other.predeclared_by_id.read(),
446            self.predeclared_by_id.write(),
447        ) {
448            for (id, schema) in other_by_id.iter() {
449                self_by_id.entry(*id).or_insert_with(|| schema.clone());
450            }
451        }
452        if let (Ok(other_cache), Ok(mut self_cache)) = (
453            other.predeclared_cache.read(),
454            self.predeclared_cache.write(),
455        ) {
456            for (key, id) in other_cache.iter() {
457                self_cache.entry(key.clone()).or_insert(*id);
458            }
459        }
460    }
461
462    // -- Predeclared schema support (moved off process-global statics in B1.6) ---
463
464    /// Build the canonical field-signature key used by the predeclared
465    /// schema cache.
466    fn predeclared_cache_key(fields: &[&str]) -> String {
467        fields.join("\u{1f}")
468    }
469
470    /// Register (or retrieve) a predeclared schema with `FieldType::Any`
471    /// columns for the given ordered field set.
472    ///
473    /// Intended for compile-time schema derivation paths (extensions,
474    /// comptime, printing helpers) that need runtime object construction
475    /// without a user-declared type. Repeated calls with identical field
476    /// names return the same cached ID.
477    pub fn register_predeclared_any_schema(&self, fields: &[String]) -> SchemaId {
478        let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
479        let key = Self::predeclared_cache_key(&field_refs);
480
481        if let Ok(cache) = self.predeclared_cache.read() {
482            if let Some(id) = cache.get(&key) {
483                return *id;
484            }
485        }
486
487        let typed_fields: Vec<(String, FieldType)> = fields
488            .iter()
489            .map(|name| (name.clone(), FieldType::Any))
490            .collect();
491
492        let id = self.allocate_id();
493        let schema = TypeSchema::with_id(
494            id,
495            format!("__predecl_{}", fields.join("_")),
496            typed_fields,
497        );
498
499        if let Ok(mut reg) = self.predeclared_by_id.write() {
500            reg.insert(id, schema);
501        }
502        if let Ok(mut cache) = self.predeclared_cache.write() {
503            cache.insert(key, id);
504        }
505        id
506    }
507
508    /// Look up a predeclared schema by ID.
509    pub fn lookup_predeclared_by_id(&self, id: SchemaId) -> Option<TypeSchema> {
510        self.predeclared_by_id
511            .read()
512            .ok()
513            .and_then(|reg| reg.get(&id).cloned())
514    }
515
516    /// Mirror a predeclared schema with a caller-supplied ID.
517    ///
518    /// Used during the B1 migration window by
519    /// [`super::register_predeclared_any_schema`] so a single SchemaId
520    /// owned by the process-wide fallback registry is also visible
521    /// through the per-Runtime ambient registry. Idempotent: a second
522    /// call with the same ID is a no-op.
523    pub fn mirror_predeclared_any_schema(&self, fields: &[String], id: SchemaId) {
524        let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
525        let key = Self::predeclared_cache_key(&field_refs);
526
527        if let Ok(cache) = self.predeclared_cache.read() {
528            if cache.get(&key).copied() == Some(id) {
529                return;
530            }
531        }
532
533        let typed_fields: Vec<(String, FieldType)> = fields
534            .iter()
535            .map(|name| (name.clone(), FieldType::Any))
536            .collect();
537
538        let schema = TypeSchema::with_id(
539            id,
540            format!("__predecl_{}", fields.join("_")),
541            typed_fields,
542        );
543
544        if let Ok(mut reg) = self.predeclared_by_id.write() {
545            reg.entry(id).or_insert(schema);
546        }
547        if let Ok(mut cache) = self.predeclared_cache.write() {
548            cache.entry(key).or_insert(id);
549        }
550    }
551
552    /// Look up a predeclared schema ID by an ordered field signature (fast
553    /// path).
554    pub fn lookup_predeclared_id_by_field_order(&self, fields: &[&str]) -> Option<SchemaId> {
555        let key = Self::predeclared_cache_key(fields);
556        self.predeclared_cache
557            .read()
558            .ok()
559            .and_then(|cache| cache.get(&key).copied())
560    }
561
562    /// Order-insensitive predeclared schema lookup by field set.
563    pub fn lookup_predeclared_by_field_set(&self, fields: &[&str]) -> Option<TypeSchema> {
564        let Ok(reg) = self.predeclared_by_id.read() else {
565            return None;
566        };
567        reg.values()
568            .find(|schema| {
569                if schema.fields.len() != fields.len() {
570                    return false;
571                }
572                let wanted: std::collections::HashSet<&str> = fields.iter().copied().collect();
573                schema
574                    .fields
575                    .iter()
576                    .all(|f| wanted.contains(f.name.as_str()))
577            })
578            .cloned()
579    }
580}
581
582// `shape_value::external_value::SchemaLookup` was deleted alongside the
583// rest of the external-value adapter layer (Phase 2b — see
584// `docs/defections.md` 2026-05-06). The trait's role was to let
585// `shape_value` look up schema metadata without depending on
586// `shape_runtime`; with `external_value` removed, callers route
587// through the runtime's `current_registry()` directly. This `impl`
588// block becomes a no-op and is omitted entirely.
589
590/// Builder for creating type schemas fluently
591pub struct TypeSchemaBuilder {
592    name: String,
593    fields: Vec<(String, FieldType)>,
594    field_meta: Vec<Vec<FieldAnnotation>>,
595}
596
597impl TypeSchemaBuilder {
598    /// Start building a new type schema
599    pub fn new(name: impl Into<String>) -> Self {
600        Self {
601            name: name.into(),
602            fields: Vec::new(),
603            field_meta: Vec::new(),
604        }
605    }
606
607    /// Add a f64 field
608    pub fn f64_field(mut self, name: impl Into<String>) -> Self {
609        self.fields.push((name.into(), FieldType::F64));
610        self.field_meta.push(vec![]);
611        self
612    }
613
614    /// Add an i64 field
615    pub fn i64_field(mut self, name: impl Into<String>) -> Self {
616        self.fields.push((name.into(), FieldType::I64));
617        self.field_meta.push(vec![]);
618        self
619    }
620
621    /// Add a decimal field (stored as f64, reconstructed as Decimal on read)
622    pub fn decimal_field(mut self, name: impl Into<String>) -> Self {
623        self.fields.push((name.into(), FieldType::Decimal));
624        self.field_meta.push(vec![]);
625        self
626    }
627
628    /// Add a boolean field
629    pub fn bool_field(mut self, name: impl Into<String>) -> Self {
630        self.fields.push((name.into(), FieldType::Bool));
631        self.field_meta.push(vec![]);
632        self
633    }
634
635    /// Add a string field
636    pub fn string_field(mut self, name: impl Into<String>) -> Self {
637        self.fields.push((name.into(), FieldType::String));
638        self.field_meta.push(vec![]);
639        self
640    }
641
642    /// Add a timestamp field
643    pub fn timestamp_field(mut self, name: impl Into<String>) -> Self {
644        self.fields.push((name.into(), FieldType::Timestamp));
645        self.field_meta.push(vec![]);
646        self
647    }
648
649    /// Add a nested object field
650    pub fn object_field(mut self, name: impl Into<String>, type_name: impl Into<String>) -> Self {
651        self.fields
652            .push((name.into(), FieldType::Object(type_name.into())));
653        self.field_meta.push(vec![]);
654        self
655    }
656
657    /// Add an array field
658    pub fn array_field(mut self, name: impl Into<String>, element_type: FieldType) -> Self {
659        self.fields
660            .push((name.into(), FieldType::Array(Box::new(element_type))));
661        self.field_meta.push(vec![]);
662        self
663    }
664
665    /// Add a HashMap<K, V> field. W17.3-4.1 — per audit §4.B builder
666    /// parity with `array_field`. Slot storage points to
667    /// `HeapKind::HashMap`; the schema-side variant carries the static
668    /// K/V FieldTypes for compile-time checking (ADR-006 §2.7.5).
669    pub fn hashmap_field(
670        mut self,
671        name: impl Into<String>,
672        key_type: FieldType,
673        value_type: FieldType,
674    ) -> Self {
675        self.fields.push((
676            name.into(),
677            FieldType::HashMap {
678                key: Box::new(key_type),
679                value: Box::new(value_type),
680            },
681        ));
682        self.field_meta.push(vec![]);
683        self
684    }
685
686    /// Add a Set<T> field. W17.3-4.1 — per audit §4.B builder parity
687    /// with `array_field`. Slot storage points to `HeapKind::HashSet`;
688    /// the schema-side variant carries the static element FieldType
689    /// for compile-time checking (ADR-006 §2.7.5).
690    pub fn set_field(mut self, name: impl Into<String>, element_type: FieldType) -> Self {
691        self.fields
692            .push((name.into(), FieldType::Set(Box::new(element_type))));
693        self.field_meta.push(vec![]);
694        self
695    }
696
697    /// Add a dynamic/any field
698    pub fn any_field(mut self, name: impl Into<String>) -> Self {
699        self.fields.push((name.into(), FieldType::Any));
700        self.field_meta.push(vec![]);
701        self
702    }
703
704    /// Add a field with annotation metadata
705    pub fn field_with_meta(
706        mut self,
707        name: impl Into<String>,
708        field_type: FieldType,
709        annotations: Vec<FieldAnnotation>,
710    ) -> Self {
711        self.fields.push((name.into(), field_type));
712        self.field_meta.push(annotations);
713        self
714    }
715
716    /// Build the type schema
717    pub fn build(self) -> TypeSchema {
718        let mut schema = TypeSchema::new(self.name, self.fields);
719        // Apply annotations to fields
720        for (i, annotations) in self.field_meta.into_iter().enumerate() {
721            if i < schema.fields.len() {
722                schema.fields[i].annotations = annotations;
723            }
724        }
725        schema
726    }
727
728    /// Build and register in a registry, using the registry's per-instance
729    /// schema-ID counter.
730    ///
731    /// Since B1.7 this path must not consult `current_registry`, because
732    /// `DEFAULT_SCHEMA_REGISTRY` is itself initialized via this builder
733    /// and that would cause a recursive `LazyLock` init. Allocating
734    /// directly from the target registry keeps bootstrap deterministic
735    /// and per-registry isolated.
736    pub fn register(self, registry: &mut TypeSchemaRegistry) -> SchemaId {
737        let id = registry.allocate_id();
738        let mut schema = TypeSchema::with_id(id, self.name, self.fields);
739        for (i, annotations) in self.field_meta.into_iter().enumerate() {
740            if i < schema.fields.len() {
741                schema.fields[i].annotations = annotations;
742            }
743        }
744        registry.register(schema);
745        id
746    }
747}
748
749#[cfg(test)]
750mod tests {
751    use super::*;
752
753    #[test]
754    fn test_registry() {
755        let mut registry = TypeSchemaRegistry::new();
756
757        let schema_id = registry.register_type(
758            "MyType",
759            vec![
760                ("x".to_string(), FieldType::F64),
761                ("y".to_string(), FieldType::F64),
762            ],
763        );
764
765        assert!(registry.has_type("MyType"));
766        assert!(!registry.has_type("OtherType"));
767
768        let schema = registry.get("MyType").unwrap();
769        assert_eq!(schema.id, schema_id);
770        assert_eq!(schema.field_count(), 2);
771
772        // Test lookup by ID
773        let schema_by_id = registry.get_by_id(schema_id).unwrap();
774        assert_eq!(schema_by_id.name, "MyType");
775    }
776
777    #[test]
778    fn test_builder() {
779        let mut registry = TypeSchemaRegistry::new();
780
781        let schema_id = TypeSchemaBuilder::new("Point")
782            .f64_field("x")
783            .f64_field("y")
784            .f64_field("z")
785            .register(&mut registry);
786
787        let schema = registry.get_by_id(schema_id).unwrap();
788        assert_eq!(schema.name, "Point");
789        assert_eq!(schema.field_count(), 3);
790        assert_eq!(schema.field_offset("x"), Some(0));
791        assert_eq!(schema.field_offset("y"), Some(8));
792        assert_eq!(schema.field_offset("z"), Some(16));
793    }
794
795    #[test]
796    fn test_stdlib_types() {
797        let registry = TypeSchemaRegistry::with_stdlib_types();
798
799        assert!(registry.has_type("Row"));
800        let row_schema = registry.get("Row").unwrap();
801        assert!(row_schema.has_field("timestamp"));
802    }
803
804    #[test]
805    fn test_ohlcv_schema() {
806        // Example: registering an OHLCV-like type (would be done by finance stdlib)
807        let mut registry = TypeSchemaRegistry::new();
808
809        TypeSchemaBuilder::new("Candle")
810            .timestamp_field("timestamp")
811            .f64_field("open")
812            .f64_field("high")
813            .f64_field("low")
814            .f64_field("close")
815            .f64_field("volume")
816            .register(&mut registry);
817
818        let schema = registry.get("Candle").unwrap();
819        assert_eq!(schema.field_count(), 6);
820        assert_eq!(schema.data_size, 48); // 6 * 8 bytes
821
822        // Check offsets are sequential
823        assert_eq!(schema.field_offset("timestamp"), Some(0));
824        assert_eq!(schema.field_offset("open"), Some(8));
825        assert_eq!(schema.field_offset("high"), Some(16));
826        assert_eq!(schema.field_offset("low"), Some(24));
827        assert_eq!(schema.field_offset("close"), Some(32));
828        assert_eq!(schema.field_offset("volume"), Some(40));
829    }
830
831    #[test]
832    fn test_stdlib_enum_types() {
833        let registry = TypeSchemaRegistry::with_stdlib_types();
834
835        // Check Option is registered
836        assert!(registry.has_type("Option"));
837        let option_schema = registry.get("Option").unwrap();
838        assert!(option_schema.is_enum());
839        assert_eq!(option_schema.variant_id("Some"), Some(0));
840        assert_eq!(option_schema.variant_id("None"), Some(1));
841
842        // Check Result is registered
843        assert!(registry.has_type("Result"));
844        let result_schema = registry.get("Result").unwrap();
845        assert!(result_schema.is_enum());
846        assert_eq!(result_schema.variant_id("Ok"), Some(0));
847        assert_eq!(result_schema.variant_id("Err"), Some(1));
848    }
849
850    #[test]
851    fn test_max_schema_id() {
852        let mut registry = TypeSchemaRegistry::new();
853        let a = registry.register_type("A", vec![("x".to_string(), FieldType::F64)]);
854        let b = registry.register_type("B", vec![("y".to_string(), FieldType::F64)]);
855        assert_eq!(registry.max_schema_id(), Some(a.max(b)));
856    }
857
858    // ---- B1.1 parity tests --------------------------------------------------
859    //
860    // These tests exercise the new per-registry schema ID counter in isolation
861    // from the process-global `NEXT_SCHEMA_ID` static. They prove that two
862    // independent `TypeSchemaRegistry` instances built with `new_with_stdlib`
863    // allocate IDs from *their own* domains when using `register_type_scoped`
864    // / `register_enum_scoped` — the root-cause fix for the cross-test schema
865    // ID leakage that motivates Track B1.
866
867    #[test]
868    fn b1_1_registry_allocate_id_is_per_instance() {
869        let r1 = TypeSchemaRegistry::new();
870        let r2 = TypeSchemaRegistry::new();
871
872        // Both freshly-constructed registries start at the same seed value.
873        assert_eq!(r1.peek_next_id(), r2.peek_next_id());
874
875        // Allocations on r1 don't advance r2's counter.
876        let id1a = r1.allocate_id();
877        let id1b = r1.allocate_id();
878        assert_eq!(id1b, id1a + 1);
879        assert_eq!(r2.peek_next_id(), id1a);
880
881        // And vice-versa.
882        let id2a = r2.allocate_id();
883        assert_eq!(id2a, id1a);
884    }
885
886    #[test]
887    fn b1_1_new_with_stdlib_uses_registry_counter_for_scoped_types() {
888        let mut r1 = TypeSchemaRegistry::new_with_stdlib();
889        let mut r2 = TypeSchemaRegistry::new_with_stdlib();
890
891        // Both registries expose the canonical stdlib types.
892        for name in ["Row", "Option", "Result"] {
893            assert!(r1.has_type(name), "r1 missing {name}");
894            assert!(r2.has_type(name), "r2 missing {name}");
895        }
896
897        // User-registered schemas go through the per-registry counter and
898        // therefore get IDs from disjoint domains when allocated back-to-back
899        // on independent registries.
900        let r1_user =
901            r1.register_type_scoped("UserA", vec![("x".to_string(), FieldType::F64)]);
902        let r2_user =
903            r2.register_type_scoped("UserA", vec![("x".to_string(), FieldType::F64)]);
904
905        // Both "UserA" schemas resolve within their own registry.
906        assert_eq!(r1.get("UserA").unwrap().id, r1_user);
907        assert_eq!(r2.get("UserA").unwrap().id, r2_user);
908
909        // The key invariant: r2's scoped ID is NOT advanced by allocations on
910        // r1. Independent registries can produce equal IDs for the same name
911        // without collision inside their own space.
912        let r1_user_b =
913            r1.register_type_scoped("UserB", vec![("y".to_string(), FieldType::F64)]);
914        assert_eq!(r1_user_b, r1_user + 1);
915
916        // r2's counter is unaffected by r1_user_b.
917        let r2_user_b =
918            r2.register_type_scoped("UserB", vec![("y".to_string(), FieldType::F64)]);
919        assert_eq!(r2_user_b, r2_user + 1);
920    }
921
922    #[test]
923    fn b1_1_scoped_enum_ids_are_per_registry() {
924        let mut r1 = TypeSchemaRegistry::new();
925        let mut r2 = TypeSchemaRegistry::new();
926
927        let e1 = r1.register_enum_scoped(
928            "Color",
929            vec![
930                EnumVariantInfo::new("Red", 0, 0),
931                EnumVariantInfo::new("Green", 1, 0),
932            ],
933        );
934        let e2 = r2.register_enum_scoped(
935            "Color",
936            vec![
937                EnumVariantInfo::new("Red", 0, 0),
938                EnumVariantInfo::new("Green", 1, 0),
939            ],
940        );
941
942        // Independent registries may legitimately produce the same ID for an
943        // enum type defined under the same name.
944        assert_eq!(e1, e2);
945        assert!(r1.get("Color").unwrap().is_enum());
946        assert!(r2.get("Color").unwrap().is_enum());
947    }
948
949    #[test]
950    fn b1_1_ensure_next_id_above_is_per_registry() {
951        let r1 = TypeSchemaRegistry::new();
952        let r2 = TypeSchemaRegistry::new();
953
954        r1.ensure_next_id_above(500);
955        assert_eq!(r1.peek_next_id(), 501);
956
957        // r2 is unaffected.
958        assert_eq!(r2.peek_next_id(), INITIAL_SCHEMA_ID);
959    }
960}