lex_bytecode/shape_registry.rs
1//! Process-global record-shape registry (#462 slice 3).
2//!
3//! Maps sorted field-name lists to stable `u32` shape IDs for records
4//! built at runtime — JSON decode, SQL row, HTTP body, host effect
5//! handlers, builtin returns. Anywhere `Value::record_dynamic` is
6//! called.
7//!
8//! Why: the slice-2b measurement (`docs/design/ic-polymorphism-measurement.md`)
9//! found 14% of GetField IC hits — and 100% of inbox/gateway/std_http
10//! traffic — landed on records carrying `NO_SHAPE_ID`. Those records
11//! all aliased on the sentinel, so the IC's shape-keyed verification
12//! (added in #517) couldn't distinguish them and fell through to the
13//! name-compare path on every hit. With this registry, dynamic records
14//! built from the same field set share a real `shape_id` and the IC
15//! hits cleanly amongst them.
16//!
17//! ## Disjoint ID spaces
18//!
19//! Compile-time `Op::MakeRecord` shape IDs come from
20//! `Program::record_shapes` and are per-program indices (0, 1, 2, …).
21//! Runtime registry IDs come from this module and live in the high
22//! half of the `u32` range starting at [`DYNAMIC_SHAPE_ID_BASE`].
23//! `NO_SHAPE_ID` (`u32::MAX`) is reserved.
24//!
25//! Disjoint spaces are required for IC correctness: the shape-keyed
26//! verifier in `vm.rs` treats `cached_shape == incoming_shape` as
27//! "offset is sound" for non-`NO_SHAPE_ID` cases. If a compile-time
28//! ID collided with a dynamic ID for a different field set, the IC
29//! would return values from the wrong field.
30//!
31//! ## Cost
32//!
33//! One `RwLock<IndexMap>` lookup per `record_dynamic` call. The map
34//! grows monotonically (shapes are never freed) but is small in
35//! practice — tens of distinct shapes per workload. Lookups are
36//! read-lock + hash; misses take the write lock once per new shape.
37//!
38//! ## Sorting
39//!
40//! The key is the sorted field-name vec — two records with the same
41//! fields in different insertion order share a `shape_id`. Matches
42//! the existing `Value::Record` `PartialEq` (which compares `IndexMap`
43//! contents structurally, ignoring `shape_id`), so equality and IC
44//! sharing line up.
45
46use indexmap::IndexMap;
47use std::sync::{OnceLock, RwLock};
48
49/// Base for dynamically-interned shape IDs. Chosen so that
50/// `Program::record_shapes` indices (which start at 0 and grow up)
51/// cannot collide with dynamic IDs even for pathologically large
52/// programs. Compile-time records will hit a different IC slot
53/// outcome than dynamic records with the same field set — that's
54/// the cost of keeping ID spaces disjoint; the slice-2b
55/// measurement found 0 mixed-flavor sites in real workloads.
56pub const DYNAMIC_SHAPE_ID_BASE: u32 = 0x8000_0000;
57
58fn registry() -> &'static RwLock<IndexMap<Vec<String>, u32>> {
59 static REGISTRY: OnceLock<RwLock<IndexMap<Vec<String>, u32>>> = OnceLock::new();
60 REGISTRY.get_or_init(|| RwLock::new(IndexMap::new()))
61}
62
63/// Look up (or assign) the shape ID for the given field-name list.
64/// Same set of names — in any insertion order — yields the same ID
65/// for the lifetime of the process.
66pub fn intern(field_names: impl IntoIterator<Item = impl AsRef<str>>) -> u32 {
67 let mut key: Vec<String> = field_names.into_iter().map(|s| s.as_ref().to_string()).collect();
68 key.sort();
69 // Fast path: read-only lookup.
70 if let Some(&id) = registry().read().unwrap().get(&key) {
71 return id;
72 }
73 // Slow path: write-lock, re-check, insert.
74 let mut w = registry().write().unwrap();
75 if let Some(&id) = w.get(&key) {
76 return id;
77 }
78 let id = DYNAMIC_SHAPE_ID_BASE + w.len() as u32;
79 w.insert(key, id);
80 id
81}
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86
87 #[test]
88 fn same_fields_yield_same_id_regardless_of_order() {
89 let a = intern(["x", "y", "z"]);
90 let b = intern(["z", "y", "x"]);
91 let c = intern(["y", "x", "z"]);
92 assert_eq!(a, b);
93 assert_eq!(b, c);
94 }
95
96 #[test]
97 fn distinct_fields_yield_distinct_ids() {
98 let a = intern(["a", "b"]);
99 let b = intern(["a", "c"]);
100 assert_ne!(a, b);
101 }
102
103 #[test]
104 fn ids_are_in_the_dynamic_range() {
105 let id = intern(["only_for_this_test"]);
106 assert!(id >= DYNAMIC_SHAPE_ID_BASE);
107 assert!(id < u32::MAX);
108 }
109}