shape_runtime/type_schema/mod.rs
1//! Type Schema Registry for JIT Type Specialization
2//!
3//! This module provides compile-time type information for optimizing
4//! field access in the JIT compiler. When the type of an object is known,
5//! we can generate direct memory access instead of HashMap lookups.
6//!
7//! # Overview
8//!
9//! - `TypeSchema` - Describes the layout of a declared type
10//! - `FieldDef` - Defines a single field with name, type, and offset
11//! - `TypeSchemaRegistry` - Global registry of all known type schemas
12//!
13//! # Performance
14//!
15//! Direct field access: ~2ns vs HashMap lookup: ~25ns (12x faster)
16//!
17//! # Intersection Types
18//!
19//! Supports merging multiple schemas for intersection types (`A + B`).
20//! Field collisions are detected at compile time and result in errors.
21
22use shape_value::{HeapKind, KindedSlot, NativeKind, ValueSlot};
23use shape_value::heap_value::HeapValue;
24use std::collections::{HashMap, HashSet};
25use std::sync::Arc;
26
27// ADR-005: TypedFieldValue is the input carrier ABI for object construction.
28// Single-discriminator discipline (§Decision §1): all heap types route through
29// `Heap(Arc<HeapValue>)` and dispatch via `HeapValue::kind()`. The single
30// explicit exception is `String(Arc<String>)` (§Decision §2), justified by
31// measured allocation cost on the most common heap type — strings are an
32// order of magnitude more frequent than other heap types in stdlib parser
33// output, and routing them through `Arc::new(HeapValue::String(arc))` would
34// cost one extra `Arc::new` allocation per string field at construction.
35//
36// Per ADR-005 §Forbidden, do NOT add per-HeapKind variants here
37// (Array/Object/HashMap/Decimal/Timestamp/...). Adding any such variant
38// requires its own ADR-level justification with measurement.
39//
40// See docs/adr/005-typed-slot-construction.md.
41#[derive(Debug, Clone)]
42pub enum TypedFieldValue {
43 F64(f64),
44 I64(i64),
45 I8(i8),
46 U8(u8),
47 I16(i16),
48 U16(u16),
49 I32(i32),
50 U32(u32),
51 U64(u64),
52 Bool(bool),
53 /// String exception, named and bounded in ADR-005 §Decision §2.
54 /// `Arc<String>` is the runtime carrier (refcounted shared ownership);
55 /// not `String` (owned), not `&str` (borrowed), not `StringId` (interned).
56 /// Future interning layer (ADR-005 §5 Layer 3) coexists by deduplicating
57 /// the Arc-inner.
58 String(Arc<String>),
59 /// Single discriminator for all other heap types. Dispatch via
60 /// `HeapValue::kind()`. Per ADR-005 §1, no parallel sum types whose
61 /// variants project 1:1 to HeapKind.
62 Heap(Arc<HeapValue>),
63}
64
65// Module declarations
66pub mod builtin_schemas;
67pub mod current;
68pub mod enum_support;
69pub mod field_types;
70pub mod intersection;
71pub mod physical_binding;
72pub mod registry;
73pub mod schema;
74
75// Re-export public types for backward compatibility
76pub use builtin_schemas::BuiltinSchemaIds;
77pub use current::{
78 SyncRegistryScope, current_registry, default_registry, try_current_registry, with_async_scope,
79};
80pub use enum_support::{EnumInfo, EnumVariantInfo, EnumVariantKind};
81pub use field_types::{FieldAnnotation, FieldDef, FieldType};
82pub use physical_binding::PhysicalSchemaBinding;
83pub use registry::{TypeSchemaBuilder, TypeSchemaRegistry};
84pub use schema::{TypeBinding, TypeBindingError, TypeSchema};
85
86/// Error type for schema operations
87#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
88pub enum SchemaError {
89 /// Field collision detected during intersection merge
90 #[error("Field collision on '{field_name}': type '{type1}' vs '{type2}'")]
91 FieldCollision {
92 field_name: String,
93 type1: String,
94 type2: String,
95 },
96 /// Schema not found
97 #[error("Schema not found: {0}")]
98 NotFound(String),
99}
100
101/// Unique identifier for a type schema
102pub type SchemaId = u32;
103
104/// Ensure all future schema IDs from the current ambient registry are
105/// strictly greater than `max_existing_id`.
106///
107/// Used when loading externally compiled/cached bytecode that may contain
108/// schema IDs from previous processes. Since B1.7 the reservation lands
109/// on [`current_registry`] instead of a process-global counter, so each
110/// runtime narrows the reservation to its own domain.
111pub fn ensure_next_schema_id_above(max_existing_id: SchemaId) {
112 current_registry().ensure_next_id_above(max_existing_id);
113}
114
115/// Register a predeclared schema with `FieldType::Any` for the given ordered fields.
116///
117/// This is intended for compile-time schema derivation paths (extensions/comptime)
118/// that need runtime object construction without runtime schema synthesis.
119///
120/// Since B1.7 the registration targets the ambient [`current_registry`]
121/// exclusively; scopeless callers land on the process-wide default
122/// registry exposed by that accessor. The previous `FALLBACK_PREDECLARED_REGISTRY`
123/// static has been retired.
124pub fn register_predeclared_any_schema(fields: &[String]) -> SchemaId {
125 current_registry().register_predeclared_any_schema(fields)
126}
127
128fn lookup_predeclared_schema_by_id(id: SchemaId) -> Option<TypeSchema> {
129 current_registry().lookup_predeclared_by_id(id)
130}
131
132fn lookup_predeclared_schema_id(fields: &[&str]) -> Option<SchemaId> {
133 let reg = current_registry();
134
135 // Order-sensitive fast path over the current registry's predeclared cache.
136 if let Some(id) = reg.lookup_predeclared_id_by_field_order(fields) {
137 return Some(id);
138 }
139
140 // Ordered match against user-registered / stdlib schemas in the ambient
141 // registry.
142 reg.type_names()
143 .filter_map(|name| reg.get(name))
144 .find(|schema| {
145 if schema.fields.len() != fields.len() {
146 return false;
147 }
148 schema
149 .fields
150 .iter()
151 .map(|f| f.name.as_str())
152 .eq(fields.iter().copied())
153 })
154 .map(|schema| schema.id)
155}
156
157fn lookup_schema_by_id(id: SchemaId) -> Option<TypeSchema> {
158 let reg = current_registry();
159 if let Some(schema) = reg.get_by_id(id).cloned() {
160 return Some(schema);
161 }
162 reg.lookup_predeclared_by_id(id)
163}
164
165/// Public wrapper for looking up a schema by ID across all registries
166/// (stdlib + predeclared). Used by wire_conversion when Context registry
167/// doesn't have the schema (e.g. ad-hoc/const-eval objects).
168pub fn lookup_schema_by_id_public(id: SchemaId) -> Option<TypeSchema> {
169 lookup_schema_by_id(id)
170}
171
172fn schema_matches_field_set(schema: &TypeSchema, fields: &[&str]) -> bool {
173 if schema.fields.len() != fields.len() {
174 return false;
175 }
176 let wanted: HashSet<&str> = fields.iter().copied().collect();
177 schema
178 .fields
179 .iter()
180 .all(|field| wanted.contains(field.name.as_str()))
181}
182
183/// Resolve a schema for a field list.
184///
185/// Resolution is order-sensitive first (fast path), then order-insensitive
186/// fallback for wire/object map roundtrips where key ordering is unstable.
187/// If no existing schema matches, auto-registers an anonymous `FieldType::Any`
188/// schema so that ad-hoc objects (const eval, tests, FFI) work without
189/// explicit pre-registration.
190fn lookup_schema_for_fields(fields: &[&str]) -> Option<TypeSchema> {
191 if let Some(id) = lookup_predeclared_schema_id(fields) {
192 return lookup_schema_by_id(id);
193 }
194
195 let reg = current_registry();
196 // Order-insensitive match over the current registry's named schemas.
197 if let Some(schema) = reg
198 .type_names()
199 .filter_map(|name| reg.get(name))
200 .find(|schema| schema_matches_field_set(schema, fields))
201 {
202 return Some(schema.clone());
203 }
204 if let Some(schema) = reg.lookup_predeclared_by_field_set(fields) {
205 return Some(schema);
206 }
207
208 // Auto-register an anonymous schema for ad-hoc field sets.
209 let owned: Vec<String> = fields.iter().map(|s| s.to_string()).collect();
210 let id = register_predeclared_any_schema(&owned);
211 lookup_predeclared_schema_by_id(id)
212}
213
214/// Create a `KindedSlot` carrying a `HeapValue::TypedObject` from a list
215/// of `(name, KindedSlot)` field pairs.
216///
217/// Per ADR-006 §2.7.4 audit-accuracy ruling + §2.7.3 N9 cleanup
218/// pre-flag, the previous `nb_to_slot` body relied on tag-bit dispatch
219/// via `value.is_heap()` / `value.raw_bits()` / `value.as_heap_ref()`
220/// / `value.as_any_array().to_generic()` (the forbidden N9
221/// tag-decoding pattern). The kind-threaded rebuild reads each pair's
222/// `NativeKind` from the `KindedSlot::kind` field (single source of
223/// truth) and dispatches per-kind to the matching per-FieldType
224/// `ValueSlot::from_*` constructor — no heap materialization, no
225/// `is_heap()` consultation. The slot's strong-count share is moved
226/// into the typed-object's slot list (the caller's `KindedSlot::clone`
227/// bumped it on construction).
228pub fn typed_object_from_pairs(fields: &[(&str, KindedSlot)]) -> KindedSlot {
229 let field_names: Vec<&str> = fields.iter().map(|(name, _)| *name).collect();
230 let schema = lookup_schema_for_fields(&field_names).unwrap_or_else(|| {
231 panic!(
232 "Missing predeclared schema for fields [{}]. Runtime schema synthesis is disabled.",
233 field_names.join(", ")
234 )
235 });
236 let value_by_name: HashMap<&str, &KindedSlot> =
237 fields.iter().map(|(name, value)| (*name, value)).collect();
238
239 // Build slots — `NativeKind` selects the per-FieldType constructor.
240 // Heap arms set the heap_mask bit; inline-scalar arms do not.
241 let mut slots = Vec::with_capacity(schema.fields.len());
242 let mut field_kinds: Vec<NativeKind> = Vec::with_capacity(schema.fields.len());
243 let mut heap_mask: u64 = 0;
244 for (i, field_def) in schema.fields.iter().enumerate() {
245 let value = value_by_name
246 .get(field_def.name.as_str())
247 .unwrap_or_else(|| {
248 panic!(
249 "Missing field '{}' while materializing typed object",
250 field_def.name
251 )
252 });
253 // `KindedSlot::clone` bumps the heap refcount; the resulting
254 // `ValueSlot` owns one strong-count share independent of the
255 // input pair's share. The bits transfer is a memcpy of the raw
256 // u64; the explicit `clone()` does the per-kind retain.
257 let cloned = (*value).clone();
258 let bits = cloned.slot().raw();
259 let kind = cloned.kind();
260 let is_heap = match kind {
261 NativeKind::String | NativeKind::Ptr(_) => true,
262 _ => false,
263 };
264 // Forget the cloned `KindedSlot` so its `Drop` does not
265 // decrement the share we just transferred into the slot list.
266 std::mem::forget(cloned);
267 let slot = ValueSlot::from_raw(bits);
268 slots.push(slot);
269 field_kinds.push(kind);
270 if is_heap {
271 heap_mask |= 1u64 << i;
272 }
273 }
274
275 // Wave 2 Round 4 D4 ckpt-1: migrated from
276 // `Arc::new(TypedObjectStorage::new(...))` to v2-raw `_new`
277 // returning `*mut TypedObjectStorage`; slot constructed via
278 // `from_typed_object_raw` per D1 API surface. The legacy
279 // `Arc<TypedObjectStorage>` carrier through `HeapValue::TypedObject`
280 // remains the variant signature until ckpt-final atomic flip; the
281 // intermediate type-witness check is dropped because `_new` returns
282 // a raw pointer rather than an `Arc` (cargo check expected broken
283 // here until the variant signature flips in ckpt-final lockstep).
284 let ptr = shape_value::TypedObjectStorage::_new(
285 schema.id as u64,
286 slots.into_boxed_slice(),
287 heap_mask,
288 Arc::from(field_kinds.into_boxed_slice()),
289 );
290 KindedSlot::new(
291 ValueSlot::from_typed_object_raw(ptr),
292 NativeKind::Ptr(HeapKind::TypedObject),
293 )
294}
295
296#[cfg(test)]
297mod tests {
298 // Pre-bulldozer tests of `typed_object_from_pairs` /
299 // `typed_object_to_hashmap_nb` decoded slots via `ValueWord`'s
300 // `.as_str()` / `.as_i64()` methods. Phase 1.B retires those
301 // accessors with the rest of `ValueWord`. Behavioural coverage of
302 // typed-object construction returns when shape-vm Cluster #4 lands
303 // its kind-threaded slot tests.
304}
305
306/// Convert a TypedObject `KindedSlot` back to a `HashMap<String, KindedSlot>`.
307///
308/// Inverse of [`typed_object_from_pairs`]. Reads the `TypedObject` heap
309/// value and rebuilds a per-field map keyed by the schema's field
310/// names. Phase 1.B (ADR-006 §2.7.4 audit-accuracy ruling): the per-
311/// slot `NativeKind` is derived from the schema's `FieldType` — the
312/// stored slots carry no per-position kind metadata in the current
313/// fast path. Phase 2c lands schema → `NativeKind` lowering as a
314/// shared utility; until then this helper returns `None` when the
315/// schema is not registered or the value is not a TypedObject.
316pub fn typed_object_to_hashmap_nb(
317 _value: &KindedSlot,
318) -> Option<HashMap<String, KindedSlot>> {
319 // Phase 1.B: schema → NativeKind lowering is the deferred Phase 2c
320 // utility. This helper's pre-bulldozer body decoded slots via
321 // `slots[i].as_heap_nb()` / `ValueWord::clone_from_bits` (now
322 // deleted). Returning `None` keeps callers honest until the kind-
323 // threaded rebuild lands; the only current consumer is the deleted
324 // unit test above.
325 None
326}
327