Skip to main content

shape_runtime/type_schema/
mod.rs

1//! Type Schema Registry for JIT Type Specialization
2//!
3//! This module provides compile-time type information for optimizing
4//! field access in the JIT compiler. When the type of an object is known,
5//! we can generate direct memory access instead of HashMap lookups.
6//!
7//! # Overview
8//!
9//! - `TypeSchema` - Describes the layout of a declared type
10//! - `FieldDef` - Defines a single field with name, type, and offset
11//! - `TypeSchemaRegistry` - Global registry of all known type schemas
12//!
13//! # Performance
14//!
15//! Direct field access: ~2ns vs HashMap lookup: ~25ns (12x faster)
16//!
17//! # Intersection Types
18//!
19//! Supports merging multiple schemas for intersection types (`A + B`).
20//! Field collisions are detected at compile time and result in errors.
21
22use shape_value::{HeapKind, KindedSlot, NativeKind, ValueSlot};
23use shape_value::heap_value::HeapValue;
24use std::collections::{HashMap, HashSet};
25use std::sync::Arc;
26
27// ADR-005: TypedFieldValue is the input carrier ABI for object construction.
28// Single-discriminator discipline (§Decision §1): all heap types route through
29// `Heap(Arc<HeapValue>)` and dispatch via `HeapValue::kind()`. The single
30// explicit exception is `String(Arc<String>)` (§Decision §2), justified by
31// measured allocation cost on the most common heap type — strings are an
32// order of magnitude more frequent than other heap types in stdlib parser
33// output, and routing them through `Arc::new(HeapValue::String(arc))` would
34// cost one extra `Arc::new` allocation per string field at construction.
35//
36// Per ADR-005 §Forbidden, do NOT add per-HeapKind variants here
37// (Array/Object/HashMap/Decimal/Timestamp/...). Adding any such variant
38// requires its own ADR-level justification with measurement.
39//
40// See docs/adr/005-typed-slot-construction.md.
41#[derive(Debug, Clone)]
42pub enum TypedFieldValue {
43    F64(f64),
44    I64(i64),
45    I8(i8),
46    U8(u8),
47    I16(i16),
48    U16(u16),
49    I32(i32),
50    U32(u32),
51    U64(u64),
52    Bool(bool),
53    /// String exception, named and bounded in ADR-005 §Decision §2.
54    /// `Arc<String>` is the runtime carrier (refcounted shared ownership);
55    /// not `String` (owned), not `&str` (borrowed), not `StringId` (interned).
56    /// Future interning layer (ADR-005 §5 Layer 3) coexists by deduplicating
57    /// the Arc-inner.
58    String(Arc<String>),
59    /// Single discriminator for all other heap types. Dispatch via
60    /// `HeapValue::kind()`. Per ADR-005 §1, no parallel sum types whose
61    /// variants project 1:1 to HeapKind.
62    Heap(Arc<HeapValue>),
63}
64
65// Module declarations
66pub mod builtin_schemas;
67pub mod current;
68pub mod enum_support;
69pub mod field_types;
70pub mod intersection;
71pub mod physical_binding;
72pub mod registry;
73pub mod schema;
74
75// Re-export public types for backward compatibility
76pub use builtin_schemas::BuiltinSchemaIds;
77pub use current::{
78    SyncRegistryScope, current_registry, default_registry, try_current_registry, with_async_scope,
79};
80pub use enum_support::{EnumInfo, EnumVariantInfo, EnumVariantKind};
81pub use field_types::{FieldAnnotation, FieldDef, FieldType};
82pub use physical_binding::PhysicalSchemaBinding;
83pub use registry::{TypeSchemaBuilder, TypeSchemaRegistry};
84pub use schema::{TypeBinding, TypeBindingError, TypeSchema};
85
86/// Error type for schema operations
87#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
88pub enum SchemaError {
89    /// Field collision detected during intersection merge
90    #[error("Field collision on '{field_name}': type '{type1}' vs '{type2}'")]
91    FieldCollision {
92        field_name: String,
93        type1: String,
94        type2: String,
95    },
96    /// Schema not found
97    #[error("Schema not found: {0}")]
98    NotFound(String),
99}
100
101/// Unique identifier for a type schema
102pub type SchemaId = u32;
103
104/// Ensure all future schema IDs from the current ambient registry are
105/// strictly greater than `max_existing_id`.
106///
107/// Used when loading externally compiled/cached bytecode that may contain
108/// schema IDs from previous processes. Since B1.7 the reservation lands
109/// on [`current_registry`] instead of a process-global counter, so each
110/// runtime narrows the reservation to its own domain.
111pub fn ensure_next_schema_id_above(max_existing_id: SchemaId) {
112    current_registry().ensure_next_id_above(max_existing_id);
113}
114
115/// Register a predeclared schema with `FieldType::Any` for the given ordered fields.
116///
117/// This is intended for compile-time schema derivation paths (extensions/comptime)
118/// that need runtime object construction without runtime schema synthesis.
119///
120/// Since B1.7 the registration targets the ambient [`current_registry`]
121/// exclusively; scopeless callers land on the process-wide default
122/// registry exposed by that accessor. The previous `FALLBACK_PREDECLARED_REGISTRY`
123/// static has been retired.
124pub fn register_predeclared_any_schema(fields: &[String]) -> SchemaId {
125    current_registry().register_predeclared_any_schema(fields)
126}
127
128fn lookup_predeclared_schema_by_id(id: SchemaId) -> Option<TypeSchema> {
129    current_registry().lookup_predeclared_by_id(id)
130}
131
132fn lookup_predeclared_schema_id(fields: &[&str]) -> Option<SchemaId> {
133    let reg = current_registry();
134
135    // Order-sensitive fast path over the current registry's predeclared cache.
136    if let Some(id) = reg.lookup_predeclared_id_by_field_order(fields) {
137        return Some(id);
138    }
139
140    // Ordered match against user-registered / stdlib schemas in the ambient
141    // registry.
142    reg.type_names()
143        .filter_map(|name| reg.get(name))
144        .find(|schema| {
145            if schema.fields.len() != fields.len() {
146                return false;
147            }
148            schema
149                .fields
150                .iter()
151                .map(|f| f.name.as_str())
152                .eq(fields.iter().copied())
153        })
154        .map(|schema| schema.id)
155}
156
157fn lookup_schema_by_id(id: SchemaId) -> Option<TypeSchema> {
158    let reg = current_registry();
159    if let Some(schema) = reg.get_by_id(id).cloned() {
160        return Some(schema);
161    }
162    reg.lookup_predeclared_by_id(id)
163}
164
165/// Public wrapper for looking up a schema by ID across all registries
166/// (stdlib + predeclared). Used by wire_conversion when Context registry
167/// doesn't have the schema (e.g. ad-hoc/const-eval objects).
168pub fn lookup_schema_by_id_public(id: SchemaId) -> Option<TypeSchema> {
169    lookup_schema_by_id(id)
170}
171
172fn schema_matches_field_set(schema: &TypeSchema, fields: &[&str]) -> bool {
173    if schema.fields.len() != fields.len() {
174        return false;
175    }
176    let wanted: HashSet<&str> = fields.iter().copied().collect();
177    schema
178        .fields
179        .iter()
180        .all(|field| wanted.contains(field.name.as_str()))
181}
182
183/// Resolve a schema for a field list.
184///
185/// Resolution is order-sensitive first (fast path), then order-insensitive
186/// fallback for wire/object map roundtrips where key ordering is unstable.
187/// If no existing schema matches, auto-registers an anonymous `FieldType::Any`
188/// schema so that ad-hoc objects (const eval, tests, FFI) work without
189/// explicit pre-registration.
190fn lookup_schema_for_fields(fields: &[&str]) -> Option<TypeSchema> {
191    if let Some(id) = lookup_predeclared_schema_id(fields) {
192        return lookup_schema_by_id(id);
193    }
194
195    let reg = current_registry();
196    // Order-insensitive match over the current registry's named schemas.
197    if let Some(schema) = reg
198        .type_names()
199        .filter_map(|name| reg.get(name))
200        .find(|schema| schema_matches_field_set(schema, fields))
201    {
202        return Some(schema.clone());
203    }
204    if let Some(schema) = reg.lookup_predeclared_by_field_set(fields) {
205        return Some(schema);
206    }
207
208    // Auto-register an anonymous schema for ad-hoc field sets.
209    let owned: Vec<String> = fields.iter().map(|s| s.to_string()).collect();
210    let id = register_predeclared_any_schema(&owned);
211    lookup_predeclared_schema_by_id(id)
212}
213
214/// Create a `KindedSlot` carrying a `HeapValue::TypedObject` from a list
215/// of `(name, KindedSlot)` field pairs.
216///
217/// Per ADR-006 §2.7.4 audit-accuracy ruling + §2.7.3 N9 cleanup
218/// pre-flag, the previous `nb_to_slot` body relied on tag-bit dispatch
219/// via `value.is_heap()` / `value.raw_bits()` / `value.as_heap_ref()`
220/// / `value.as_any_array().to_generic()` (the forbidden N9
221/// tag-decoding pattern). The kind-threaded rebuild reads each pair's
222/// `NativeKind` from the `KindedSlot::kind` field (single source of
223/// truth) and dispatches per-kind to the matching per-FieldType
224/// `ValueSlot::from_*` constructor — no heap materialization, no
225/// `is_heap()` consultation. The slot's strong-count share is moved
226/// into the typed-object's slot list (the caller's `KindedSlot::clone`
227/// bumped it on construction).
228pub fn typed_object_from_pairs(fields: &[(&str, KindedSlot)]) -> KindedSlot {
229    let field_names: Vec<&str> = fields.iter().map(|(name, _)| *name).collect();
230    let schema = lookup_schema_for_fields(&field_names).unwrap_or_else(|| {
231        panic!(
232            "Missing predeclared schema for fields [{}]. Runtime schema synthesis is disabled.",
233            field_names.join(", ")
234        )
235    });
236    let value_by_name: HashMap<&str, &KindedSlot> =
237        fields.iter().map(|(name, value)| (*name, value)).collect();
238
239    // Build slots — `NativeKind` selects the per-FieldType constructor.
240    // Heap arms set the heap_mask bit; inline-scalar arms do not.
241    let mut slots = Vec::with_capacity(schema.fields.len());
242    let mut field_kinds: Vec<NativeKind> = Vec::with_capacity(schema.fields.len());
243    let mut heap_mask: u64 = 0;
244    for (i, field_def) in schema.fields.iter().enumerate() {
245        let value = value_by_name
246            .get(field_def.name.as_str())
247            .unwrap_or_else(|| {
248                panic!(
249                    "Missing field '{}' while materializing typed object",
250                    field_def.name
251                )
252            });
253        // `KindedSlot::clone` bumps the heap refcount; the resulting
254        // `ValueSlot` owns one strong-count share independent of the
255        // input pair's share. The bits transfer is a memcpy of the raw
256        // u64; the explicit `clone()` does the per-kind retain.
257        let cloned = (*value).clone();
258        let bits = cloned.slot().raw();
259        let kind = cloned.kind();
260        let is_heap = match kind {
261            NativeKind::String | NativeKind::Ptr(_) => true,
262            _ => false,
263        };
264        // Forget the cloned `KindedSlot` so its `Drop` does not
265        // decrement the share we just transferred into the slot list.
266        std::mem::forget(cloned);
267        let slot = ValueSlot::from_raw(bits);
268        slots.push(slot);
269        field_kinds.push(kind);
270        if is_heap {
271            heap_mask |= 1u64 << i;
272        }
273    }
274
275    // Wave 2 Round 4 D4 ckpt-1: migrated from
276    // `Arc::new(TypedObjectStorage::new(...))` to v2-raw `_new`
277    // returning `*mut TypedObjectStorage`; slot constructed via
278    // `from_typed_object_raw` per D1 API surface. The legacy
279    // `Arc<TypedObjectStorage>` carrier through `HeapValue::TypedObject`
280    // remains the variant signature until ckpt-final atomic flip; the
281    // intermediate type-witness check is dropped because `_new` returns
282    // a raw pointer rather than an `Arc` (cargo check expected broken
283    // here until the variant signature flips in ckpt-final lockstep).
284    let ptr = shape_value::TypedObjectStorage::_new(
285        schema.id as u64,
286        slots.into_boxed_slice(),
287        heap_mask,
288        Arc::from(field_kinds.into_boxed_slice()),
289    );
290    KindedSlot::new(
291        ValueSlot::from_typed_object_raw(ptr),
292        NativeKind::Ptr(HeapKind::TypedObject),
293    )
294}
295
296#[cfg(test)]
297mod tests {
298    // Pre-bulldozer tests of `typed_object_from_pairs` /
299    // `typed_object_to_hashmap_nb` decoded slots via `ValueWord`'s
300    // `.as_str()` / `.as_i64()` methods. Phase 1.B retires those
301    // accessors with the rest of `ValueWord`. Behavioural coverage of
302    // typed-object construction returns when shape-vm Cluster #4 lands
303    // its kind-threaded slot tests.
304}
305
306/// Convert a TypedObject `KindedSlot` back to a `HashMap<String, KindedSlot>`.
307///
308/// Inverse of [`typed_object_from_pairs`]. Reads the `TypedObject` heap
309/// value and rebuilds a per-field map keyed by the schema's field
310/// names. Phase 1.B (ADR-006 §2.7.4 audit-accuracy ruling): the per-
311/// slot `NativeKind` is derived from the schema's `FieldType` — the
312/// stored slots carry no per-position kind metadata in the current
313/// fast path. Phase 2c lands schema → `NativeKind` lowering as a
314/// shared utility; until then this helper returns `None` when the
315/// schema is not registered or the value is not a TypedObject.
316pub fn typed_object_to_hashmap_nb(
317    _value: &KindedSlot,
318) -> Option<HashMap<String, KindedSlot>> {
319    // Phase 1.B: schema → NativeKind lowering is the deferred Phase 2c
320    // utility. This helper's pre-bulldozer body decoded slots via
321    // `slots[i].as_heap_nb()` / `ValueWord::clone_from_bits` (now
322    // deleted). Returning `None` keeps callers honest until the kind-
323    // threaded rebuild lands; the only current consumer is the deleted
324    // unit test above.
325    None
326}
327