Skip to main content

shape_runtime/type_schema/
registry.rs

1//! Type schema registry and builder
2//!
3//! This module provides the shared registry for type schemas and a fluent
4//! builder API for creating schemas.
5
6use super::SchemaId;
7use super::enum_support::EnumVariantInfo;
8use super::field_types::{FieldAnnotation, FieldType};
9use super::schema::TypeSchema;
10use std::collections::HashMap;
11
12/// Global registry of type schemas
13#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
14pub struct TypeSchemaRegistry {
15    /// Schemas indexed by name
16    by_name: HashMap<String, TypeSchema>,
17    /// Schemas indexed by ID for fast runtime lookup
18    by_id: HashMap<SchemaId, String>,
19}
20
21impl TypeSchemaRegistry {
22    /// Create a new empty registry
23    pub fn new() -> Self {
24        Self::default()
25    }
26
27    /// Register a type schema
28    pub fn register(&mut self, schema: TypeSchema) {
29        let name = schema.name.clone();
30        let id = schema.id;
31        self.by_id.insert(id, name.clone());
32        self.by_name.insert(name, schema);
33    }
34
35    /// Register a type with field definitions
36    pub fn register_type(
37        &mut self,
38        name: impl Into<String>,
39        fields: Vec<(String, FieldType)>,
40    ) -> SchemaId {
41        let schema = TypeSchema::new(name, fields);
42        let id = schema.id;
43        self.register(schema);
44        id
45    }
46
47    /// Register a type with field definitions and per-field annotations.
48    ///
49    /// Each entry in `field_annotations` corresponds to the field at the same
50    /// index in `fields`. Annotations such as `@alias("wire_name")` are stored
51    /// on the resulting `FieldDef` so that serialization and deserialization
52    /// boundaries can use `wire_name()` instead of the field name.
53    pub fn register_type_with_annotations(
54        &mut self,
55        name: impl Into<String>,
56        fields: Vec<(String, FieldType)>,
57        field_annotations: Vec<Vec<FieldAnnotation>>,
58    ) -> SchemaId {
59        let mut schema = TypeSchema::new(name, fields);
60        for (i, annotations) in field_annotations.into_iter().enumerate() {
61            if i < schema.fields.len() && !annotations.is_empty() {
62                schema.fields[i].annotations = annotations;
63            }
64        }
65        let id = schema.id;
66        self.register(schema);
67        id
68    }
69
70    /// Get schema by name
71    pub fn get(&self, name: &str) -> Option<&TypeSchema> {
72        self.by_name.get(name)
73    }
74
75    /// Get schema by ID
76    pub fn get_by_id(&self, id: SchemaId) -> Option<&TypeSchema> {
77        self.by_id.get(&id).and_then(|name| self.by_name.get(name))
78    }
79
80    /// Highest schema ID currently stored in this registry.
81    pub fn max_schema_id(&self) -> Option<SchemaId> {
82        self.by_id.keys().copied().max()
83    }
84
85    /// Get field offset for a type/field combination
86    pub fn field_offset(&self, type_name: &str, field_name: &str) -> Option<usize> {
87        self.get(type_name)?.field_offset(field_name)
88    }
89
90    /// Check if a type is registered
91    pub fn has_type(&self, name: &str) -> bool {
92        self.by_name.contains_key(name)
93    }
94
95    /// Number of registered types
96    pub fn type_count(&self) -> usize {
97        self.by_name.len()
98    }
99
100    /// Iterator over all registered type names
101    pub fn type_names(&self) -> impl Iterator<Item = &str> {
102        self.by_name.keys().map(|s| s.as_str())
103    }
104
105    /// Create a registry with common stdlib types pre-registered
106    pub fn with_stdlib_types() -> Self {
107        let mut registry = Self::new();
108
109        // Register Row type (generic data row)
110        registry.register_type(
111            "Row",
112            vec![
113                ("timestamp".to_string(), FieldType::Timestamp),
114                ("fields".to_string(), FieldType::Any), // Dynamic fields
115            ],
116        );
117
118        // Register Option enum type
119        registry.register(TypeSchema::new_enum(
120            "Option",
121            vec![
122                EnumVariantInfo::new("Some", 0, 1), // Some(T) has 1 payload field
123                EnumVariantInfo::new("None", 1, 0), // None has no payload
124            ],
125        ));
126
127        // Register Result enum type
128        registry.register(TypeSchema::new_enum(
129            "Result",
130            vec![
131                EnumVariantInfo::new("Ok", 0, 1),  // Ok(T) has 1 payload field
132                EnumVariantInfo::new("Err", 1, 1), // Err(E) has 1 payload field
133            ],
134        ));
135
136        // Register builtin fixed-layout schemas (AnyError, TraceFrame, etc.)
137        super::builtin_schemas::register_builtin_schemas(&mut registry);
138
139        // Note: Domain-specific types (Candle, Trade, etc.) should be
140        // registered by the domain-specific stdlib, not here in core.
141
142        registry
143    }
144
145    /// Create a registry with stdlib types and return both registry and builtin IDs.
146    pub fn with_stdlib_types_and_builtin_ids() -> (Self, super::builtin_schemas::BuiltinSchemaIds) {
147        let mut registry = Self::new();
148
149        // Register Row type
150        registry.register_type(
151            "Row",
152            vec![
153                ("timestamp".to_string(), FieldType::Timestamp),
154                ("fields".to_string(), FieldType::Any),
155            ],
156        );
157
158        // Register Option/Result enum types
159        registry.register(TypeSchema::new_enum(
160            "Option",
161            vec![
162                EnumVariantInfo::new("Some", 0, 1),
163                EnumVariantInfo::new("None", 1, 0),
164            ],
165        ));
166        registry.register(TypeSchema::new_enum(
167            "Result",
168            vec![
169                EnumVariantInfo::new("Ok", 0, 1),
170                EnumVariantInfo::new("Err", 1, 1),
171            ],
172        ));
173
174        // Register builtin schemas and capture IDs
175        let ids = super::builtin_schemas::register_builtin_schemas(&mut registry);
176
177        (registry, ids)
178    }
179
180    /// Compute content hashes for all registered schemas.
181    pub fn compute_all_hashes(&mut self) {
182        for schema in self.by_name.values_mut() {
183            schema.content_hash();
184        }
185    }
186
187    /// Look up a schema by its content hash.
188    ///
189    /// Returns the first schema whose cached or computed content hash matches.
190    /// For best performance, call `compute_all_hashes` first.
191    pub fn get_by_content_hash(&self, hash: &[u8; 32]) -> Option<&TypeSchema> {
192        self.by_name.values().find(|schema| {
193            // Use cached hash if available, otherwise compute on the fly
194            let schema_hash = match schema.content_hash {
195                Some(h) => h,
196                None => schema.compute_content_hash(),
197            };
198            &schema_hash == hash
199        })
200    }
201
202    /// Merge another registry into this one
203    ///
204    /// Schemas from `other` are added to this registry. If a schema with the
205    /// same name already exists, it is NOT overwritten (first registration wins).
206    pub fn merge(&mut self, other: TypeSchemaRegistry) {
207        for (name, schema) in other.by_name {
208            if !self.by_name.contains_key(&name) {
209                let id = schema.id;
210                self.by_id.insert(id, name.clone());
211                self.by_name.insert(name, schema);
212            }
213        }
214    }
215}
216
217impl shape_value::external_value::SchemaLookup for TypeSchemaRegistry {
218    fn type_name(&self, schema_id: u64) -> Option<&str> {
219        self.get_by_id(schema_id as SchemaId)
220            .map(|s| s.name.as_str())
221    }
222
223    fn field_names(&self, schema_id: u64) -> Option<Vec<&str>> {
224        self.get_by_id(schema_id as SchemaId)
225            .map(|s| s.fields.iter().map(|f| f.name.as_str()).collect())
226    }
227}
228
229/// Builder for creating type schemas fluently
230pub struct TypeSchemaBuilder {
231    name: String,
232    fields: Vec<(String, FieldType)>,
233    field_meta: Vec<Vec<FieldAnnotation>>,
234}
235
236impl TypeSchemaBuilder {
237    /// Start building a new type schema
238    pub fn new(name: impl Into<String>) -> Self {
239        Self {
240            name: name.into(),
241            fields: Vec::new(),
242            field_meta: Vec::new(),
243        }
244    }
245
246    /// Add a f64 field
247    pub fn f64_field(mut self, name: impl Into<String>) -> Self {
248        self.fields.push((name.into(), FieldType::F64));
249        self.field_meta.push(vec![]);
250        self
251    }
252
253    /// Add an i64 field
254    pub fn i64_field(mut self, name: impl Into<String>) -> Self {
255        self.fields.push((name.into(), FieldType::I64));
256        self.field_meta.push(vec![]);
257        self
258    }
259
260    /// Add a decimal field (stored as f64, reconstructed as Decimal on read)
261    pub fn decimal_field(mut self, name: impl Into<String>) -> Self {
262        self.fields.push((name.into(), FieldType::Decimal));
263        self.field_meta.push(vec![]);
264        self
265    }
266
267    /// Add a boolean field
268    pub fn bool_field(mut self, name: impl Into<String>) -> Self {
269        self.fields.push((name.into(), FieldType::Bool));
270        self.field_meta.push(vec![]);
271        self
272    }
273
274    /// Add a string field
275    pub fn string_field(mut self, name: impl Into<String>) -> Self {
276        self.fields.push((name.into(), FieldType::String));
277        self.field_meta.push(vec![]);
278        self
279    }
280
281    /// Add a timestamp field
282    pub fn timestamp_field(mut self, name: impl Into<String>) -> Self {
283        self.fields.push((name.into(), FieldType::Timestamp));
284        self.field_meta.push(vec![]);
285        self
286    }
287
288    /// Add a nested object field
289    pub fn object_field(mut self, name: impl Into<String>, type_name: impl Into<String>) -> Self {
290        self.fields
291            .push((name.into(), FieldType::Object(type_name.into())));
292        self.field_meta.push(vec![]);
293        self
294    }
295
296    /// Add an array field
297    pub fn array_field(mut self, name: impl Into<String>, element_type: FieldType) -> Self {
298        self.fields
299            .push((name.into(), FieldType::Array(Box::new(element_type))));
300        self.field_meta.push(vec![]);
301        self
302    }
303
304    /// Add a dynamic/any field
305    pub fn any_field(mut self, name: impl Into<String>) -> Self {
306        self.fields.push((name.into(), FieldType::Any));
307        self.field_meta.push(vec![]);
308        self
309    }
310
311    /// Add a field with annotation metadata
312    pub fn field_with_meta(
313        mut self,
314        name: impl Into<String>,
315        field_type: FieldType,
316        annotations: Vec<FieldAnnotation>,
317    ) -> Self {
318        self.fields.push((name.into(), field_type));
319        self.field_meta.push(annotations);
320        self
321    }
322
323    /// Build the type schema
324    pub fn build(self) -> TypeSchema {
325        let mut schema = TypeSchema::new(self.name, self.fields);
326        // Apply annotations to fields
327        for (i, annotations) in self.field_meta.into_iter().enumerate() {
328            if i < schema.fields.len() {
329                schema.fields[i].annotations = annotations;
330            }
331        }
332        schema
333    }
334
335    /// Build and register in a registry
336    pub fn register(self, registry: &mut TypeSchemaRegistry) -> SchemaId {
337        let schema = self.build();
338        let id = schema.id;
339        registry.register(schema);
340        id
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    #[test]
349    fn test_registry() {
350        let mut registry = TypeSchemaRegistry::new();
351
352        let schema_id = registry.register_type(
353            "MyType",
354            vec![
355                ("x".to_string(), FieldType::F64),
356                ("y".to_string(), FieldType::F64),
357            ],
358        );
359
360        assert!(registry.has_type("MyType"));
361        assert!(!registry.has_type("OtherType"));
362
363        let schema = registry.get("MyType").unwrap();
364        assert_eq!(schema.id, schema_id);
365        assert_eq!(schema.field_count(), 2);
366
367        // Test lookup by ID
368        let schema_by_id = registry.get_by_id(schema_id).unwrap();
369        assert_eq!(schema_by_id.name, "MyType");
370    }
371
372    #[test]
373    fn test_builder() {
374        let mut registry = TypeSchemaRegistry::new();
375
376        let schema_id = TypeSchemaBuilder::new("Point")
377            .f64_field("x")
378            .f64_field("y")
379            .f64_field("z")
380            .register(&mut registry);
381
382        let schema = registry.get_by_id(schema_id).unwrap();
383        assert_eq!(schema.name, "Point");
384        assert_eq!(schema.field_count(), 3);
385        assert_eq!(schema.field_offset("x"), Some(0));
386        assert_eq!(schema.field_offset("y"), Some(8));
387        assert_eq!(schema.field_offset("z"), Some(16));
388    }
389
390    #[test]
391    fn test_stdlib_types() {
392        let registry = TypeSchemaRegistry::with_stdlib_types();
393
394        assert!(registry.has_type("Row"));
395        let row_schema = registry.get("Row").unwrap();
396        assert!(row_schema.has_field("timestamp"));
397    }
398
399    #[test]
400    fn test_ohlcv_schema() {
401        // Example: registering an OHLCV-like type (would be done by finance stdlib)
402        let mut registry = TypeSchemaRegistry::new();
403
404        TypeSchemaBuilder::new("Candle")
405            .timestamp_field("timestamp")
406            .f64_field("open")
407            .f64_field("high")
408            .f64_field("low")
409            .f64_field("close")
410            .f64_field("volume")
411            .register(&mut registry);
412
413        let schema = registry.get("Candle").unwrap();
414        assert_eq!(schema.field_count(), 6);
415        assert_eq!(schema.data_size, 48); // 6 * 8 bytes
416
417        // Check offsets are sequential
418        assert_eq!(schema.field_offset("timestamp"), Some(0));
419        assert_eq!(schema.field_offset("open"), Some(8));
420        assert_eq!(schema.field_offset("high"), Some(16));
421        assert_eq!(schema.field_offset("low"), Some(24));
422        assert_eq!(schema.field_offset("close"), Some(32));
423        assert_eq!(schema.field_offset("volume"), Some(40));
424    }
425
426    #[test]
427    fn test_stdlib_enum_types() {
428        let registry = TypeSchemaRegistry::with_stdlib_types();
429
430        // Check Option is registered
431        assert!(registry.has_type("Option"));
432        let option_schema = registry.get("Option").unwrap();
433        assert!(option_schema.is_enum());
434        assert_eq!(option_schema.variant_id("Some"), Some(0));
435        assert_eq!(option_schema.variant_id("None"), Some(1));
436
437        // Check Result is registered
438        assert!(registry.has_type("Result"));
439        let result_schema = registry.get("Result").unwrap();
440        assert!(result_schema.is_enum());
441        assert_eq!(result_schema.variant_id("Ok"), Some(0));
442        assert_eq!(result_schema.variant_id("Err"), Some(1));
443    }
444
445    #[test]
446    fn test_max_schema_id() {
447        let mut registry = TypeSchemaRegistry::new();
448        let a = registry.register_type("A", vec![("x".to_string(), FieldType::F64)]);
449        let b = registry.register_type("B", vec![("y".to_string(), FieldType::F64)]);
450        assert_eq!(registry.max_schema_id(), Some(a.max(b)));
451    }
452}