Skip to main content

shape_runtime/type_schema/
registry.rs

1//! Type schema registry and builder
2//!
3//! This module provides the shared registry for type schemas and a fluent
4//! builder API for creating schemas.
5
6use super::SchemaId;
7use super::enum_support::EnumVariantInfo;
8use super::field_types::{FieldAnnotation, FieldType};
9use super::schema::TypeSchema;
10use std::collections::HashMap;
11
12/// Global registry of type schemas
13#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
14pub struct TypeSchemaRegistry {
15    /// Schemas indexed by name
16    by_name: HashMap<String, TypeSchema>,
17    /// Schemas indexed by ID for fast runtime lookup
18    by_id: HashMap<SchemaId, String>,
19}
20
21impl TypeSchemaRegistry {
22    /// Create a new empty registry
23    pub fn new() -> Self {
24        Self::default()
25    }
26
27    /// Register a type schema
28    pub fn register(&mut self, schema: TypeSchema) {
29        let name = schema.name.clone();
30        let id = schema.id;
31        self.by_id.insert(id, name.clone());
32        self.by_name.insert(name, schema);
33    }
34
35    /// Register a type with field definitions
36    pub fn register_type(
37        &mut self,
38        name: impl Into<String>,
39        fields: Vec<(String, FieldType)>,
40    ) -> SchemaId {
41        let schema = TypeSchema::new(name, fields);
42        let id = schema.id;
43        self.register(schema);
44        id
45    }
46
47    /// Get schema by name
48    pub fn get(&self, name: &str) -> Option<&TypeSchema> {
49        self.by_name.get(name)
50    }
51
52    /// Get schema by ID
53    pub fn get_by_id(&self, id: SchemaId) -> Option<&TypeSchema> {
54        self.by_id.get(&id).and_then(|name| self.by_name.get(name))
55    }
56
57    /// Highest schema ID currently stored in this registry.
58    pub fn max_schema_id(&self) -> Option<SchemaId> {
59        self.by_id.keys().copied().max()
60    }
61
62    /// Get field offset for a type/field combination
63    pub fn field_offset(&self, type_name: &str, field_name: &str) -> Option<usize> {
64        self.get(type_name)?.field_offset(field_name)
65    }
66
67    /// Check if a type is registered
68    pub fn has_type(&self, name: &str) -> bool {
69        self.by_name.contains_key(name)
70    }
71
72    /// Number of registered types
73    pub fn type_count(&self) -> usize {
74        self.by_name.len()
75    }
76
77    /// Iterator over all registered type names
78    pub fn type_names(&self) -> impl Iterator<Item = &str> {
79        self.by_name.keys().map(|s| s.as_str())
80    }
81
82    /// Create a registry with common stdlib types pre-registered
83    pub fn with_stdlib_types() -> Self {
84        let mut registry = Self::new();
85
86        // Register Row type (generic data row)
87        registry.register_type(
88            "Row",
89            vec![
90                ("timestamp".to_string(), FieldType::Timestamp),
91                ("fields".to_string(), FieldType::Any), // Dynamic fields
92            ],
93        );
94
95        // Register Option enum type
96        registry.register(TypeSchema::new_enum(
97            "Option",
98            vec![
99                EnumVariantInfo::new("Some", 0, 1), // Some(T) has 1 payload field
100                EnumVariantInfo::new("None", 1, 0), // None has no payload
101            ],
102        ));
103
104        // Register Result enum type
105        registry.register(TypeSchema::new_enum(
106            "Result",
107            vec![
108                EnumVariantInfo::new("Ok", 0, 1),  // Ok(T) has 1 payload field
109                EnumVariantInfo::new("Err", 1, 1), // Err(E) has 1 payload field
110            ],
111        ));
112
113        // Register builtin fixed-layout schemas (AnyError, TraceFrame, etc.)
114        super::builtin_schemas::register_builtin_schemas(&mut registry);
115
116        // Note: Domain-specific types (Candle, Trade, etc.) should be
117        // registered by the domain-specific stdlib, not here in core.
118
119        registry
120    }
121
122    /// Create a registry with stdlib types and return both registry and builtin IDs.
123    pub fn with_stdlib_types_and_builtin_ids() -> (Self, super::builtin_schemas::BuiltinSchemaIds) {
124        let mut registry = Self::new();
125
126        // Register Row type
127        registry.register_type(
128            "Row",
129            vec![
130                ("timestamp".to_string(), FieldType::Timestamp),
131                ("fields".to_string(), FieldType::Any),
132            ],
133        );
134
135        // Register Option/Result enum types
136        registry.register(TypeSchema::new_enum(
137            "Option",
138            vec![
139                EnumVariantInfo::new("Some", 0, 1),
140                EnumVariantInfo::new("None", 1, 0),
141            ],
142        ));
143        registry.register(TypeSchema::new_enum(
144            "Result",
145            vec![
146                EnumVariantInfo::new("Ok", 0, 1),
147                EnumVariantInfo::new("Err", 1, 1),
148            ],
149        ));
150
151        // Register builtin schemas and capture IDs
152        let ids = super::builtin_schemas::register_builtin_schemas(&mut registry);
153
154        (registry, ids)
155    }
156
157    /// Compute content hashes for all registered schemas.
158    pub fn compute_all_hashes(&mut self) {
159        for schema in self.by_name.values_mut() {
160            schema.content_hash();
161        }
162    }
163
164    /// Look up a schema by its content hash.
165    ///
166    /// Returns the first schema whose cached or computed content hash matches.
167    /// For best performance, call `compute_all_hashes` first.
168    pub fn get_by_content_hash(&self, hash: &[u8; 32]) -> Option<&TypeSchema> {
169        self.by_name.values().find(|schema| {
170            // Use cached hash if available, otherwise compute on the fly
171            let schema_hash = match schema.content_hash {
172                Some(h) => h,
173                None => schema.compute_content_hash(),
174            };
175            &schema_hash == hash
176        })
177    }
178
179    /// Merge another registry into this one
180    ///
181    /// Schemas from `other` are added to this registry. If a schema with the
182    /// same name already exists, it is NOT overwritten (first registration wins).
183    pub fn merge(&mut self, other: TypeSchemaRegistry) {
184        for (name, schema) in other.by_name {
185            if !self.by_name.contains_key(&name) {
186                let id = schema.id;
187                self.by_id.insert(id, name.clone());
188                self.by_name.insert(name, schema);
189            }
190        }
191    }
192}
193
194impl shape_value::external_value::SchemaLookup for TypeSchemaRegistry {
195    fn type_name(&self, schema_id: u64) -> Option<&str> {
196        self.get_by_id(schema_id as SchemaId)
197            .map(|s| s.name.as_str())
198    }
199
200    fn field_names(&self, schema_id: u64) -> Option<Vec<&str>> {
201        self.get_by_id(schema_id as SchemaId)
202            .map(|s| s.fields.iter().map(|f| f.name.as_str()).collect())
203    }
204}
205
206/// Builder for creating type schemas fluently
207pub struct TypeSchemaBuilder {
208    name: String,
209    fields: Vec<(String, FieldType)>,
210    field_meta: Vec<Vec<FieldAnnotation>>,
211}
212
213impl TypeSchemaBuilder {
214    /// Start building a new type schema
215    pub fn new(name: impl Into<String>) -> Self {
216        Self {
217            name: name.into(),
218            fields: Vec::new(),
219            field_meta: Vec::new(),
220        }
221    }
222
223    /// Add a f64 field
224    pub fn f64_field(mut self, name: impl Into<String>) -> Self {
225        self.fields.push((name.into(), FieldType::F64));
226        self.field_meta.push(vec![]);
227        self
228    }
229
230    /// Add an i64 field
231    pub fn i64_field(mut self, name: impl Into<String>) -> Self {
232        self.fields.push((name.into(), FieldType::I64));
233        self.field_meta.push(vec![]);
234        self
235    }
236
237    /// Add a decimal field (stored as f64, reconstructed as Decimal on read)
238    pub fn decimal_field(mut self, name: impl Into<String>) -> Self {
239        self.fields.push((name.into(), FieldType::Decimal));
240        self.field_meta.push(vec![]);
241        self
242    }
243
244    /// Add a boolean field
245    pub fn bool_field(mut self, name: impl Into<String>) -> Self {
246        self.fields.push((name.into(), FieldType::Bool));
247        self.field_meta.push(vec![]);
248        self
249    }
250
251    /// Add a string field
252    pub fn string_field(mut self, name: impl Into<String>) -> Self {
253        self.fields.push((name.into(), FieldType::String));
254        self.field_meta.push(vec![]);
255        self
256    }
257
258    /// Add a timestamp field
259    pub fn timestamp_field(mut self, name: impl Into<String>) -> Self {
260        self.fields.push((name.into(), FieldType::Timestamp));
261        self.field_meta.push(vec![]);
262        self
263    }
264
265    /// Add a nested object field
266    pub fn object_field(mut self, name: impl Into<String>, type_name: impl Into<String>) -> Self {
267        self.fields
268            .push((name.into(), FieldType::Object(type_name.into())));
269        self.field_meta.push(vec![]);
270        self
271    }
272
273    /// Add an array field
274    pub fn array_field(mut self, name: impl Into<String>, element_type: FieldType) -> Self {
275        self.fields
276            .push((name.into(), FieldType::Array(Box::new(element_type))));
277        self.field_meta.push(vec![]);
278        self
279    }
280
281    /// Add a dynamic/any field
282    pub fn any_field(mut self, name: impl Into<String>) -> Self {
283        self.fields.push((name.into(), FieldType::Any));
284        self.field_meta.push(vec![]);
285        self
286    }
287
288    /// Add a field with annotation metadata
289    pub fn field_with_meta(
290        mut self,
291        name: impl Into<String>,
292        field_type: FieldType,
293        annotations: Vec<FieldAnnotation>,
294    ) -> Self {
295        self.fields.push((name.into(), field_type));
296        self.field_meta.push(annotations);
297        self
298    }
299
300    /// Build the type schema
301    pub fn build(self) -> TypeSchema {
302        let mut schema = TypeSchema::new(self.name, self.fields);
303        // Apply annotations to fields
304        for (i, annotations) in self.field_meta.into_iter().enumerate() {
305            if i < schema.fields.len() {
306                schema.fields[i].annotations = annotations;
307            }
308        }
309        schema
310    }
311
312    /// Build and register in a registry
313    pub fn register(self, registry: &mut TypeSchemaRegistry) -> SchemaId {
314        let schema = self.build();
315        let id = schema.id;
316        registry.register(schema);
317        id
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn test_registry() {
327        let mut registry = TypeSchemaRegistry::new();
328
329        let schema_id = registry.register_type(
330            "MyType",
331            vec![
332                ("x".to_string(), FieldType::F64),
333                ("y".to_string(), FieldType::F64),
334            ],
335        );
336
337        assert!(registry.has_type("MyType"));
338        assert!(!registry.has_type("OtherType"));
339
340        let schema = registry.get("MyType").unwrap();
341        assert_eq!(schema.id, schema_id);
342        assert_eq!(schema.field_count(), 2);
343
344        // Test lookup by ID
345        let schema_by_id = registry.get_by_id(schema_id).unwrap();
346        assert_eq!(schema_by_id.name, "MyType");
347    }
348
349    #[test]
350    fn test_builder() {
351        let mut registry = TypeSchemaRegistry::new();
352
353        let schema_id = TypeSchemaBuilder::new("Point")
354            .f64_field("x")
355            .f64_field("y")
356            .f64_field("z")
357            .register(&mut registry);
358
359        let schema = registry.get_by_id(schema_id).unwrap();
360        assert_eq!(schema.name, "Point");
361        assert_eq!(schema.field_count(), 3);
362        assert_eq!(schema.field_offset("x"), Some(0));
363        assert_eq!(schema.field_offset("y"), Some(8));
364        assert_eq!(schema.field_offset("z"), Some(16));
365    }
366
367    #[test]
368    fn test_stdlib_types() {
369        let registry = TypeSchemaRegistry::with_stdlib_types();
370
371        assert!(registry.has_type("Row"));
372        let row_schema = registry.get("Row").unwrap();
373        assert!(row_schema.has_field("timestamp"));
374    }
375
376    #[test]
377    fn test_ohlcv_schema() {
378        // Example: registering an OHLCV-like type (would be done by finance stdlib)
379        let mut registry = TypeSchemaRegistry::new();
380
381        TypeSchemaBuilder::new("Candle")
382            .timestamp_field("timestamp")
383            .f64_field("open")
384            .f64_field("high")
385            .f64_field("low")
386            .f64_field("close")
387            .f64_field("volume")
388            .register(&mut registry);
389
390        let schema = registry.get("Candle").unwrap();
391        assert_eq!(schema.field_count(), 6);
392        assert_eq!(schema.data_size, 48); // 6 * 8 bytes
393
394        // Check offsets are sequential
395        assert_eq!(schema.field_offset("timestamp"), Some(0));
396        assert_eq!(schema.field_offset("open"), Some(8));
397        assert_eq!(schema.field_offset("high"), Some(16));
398        assert_eq!(schema.field_offset("low"), Some(24));
399        assert_eq!(schema.field_offset("close"), Some(32));
400        assert_eq!(schema.field_offset("volume"), Some(40));
401    }
402
403    #[test]
404    fn test_stdlib_enum_types() {
405        let registry = TypeSchemaRegistry::with_stdlib_types();
406
407        // Check Option is registered
408        assert!(registry.has_type("Option"));
409        let option_schema = registry.get("Option").unwrap();
410        assert!(option_schema.is_enum());
411        assert_eq!(option_schema.variant_id("Some"), Some(0));
412        assert_eq!(option_schema.variant_id("None"), Some(1));
413
414        // Check Result is registered
415        assert!(registry.has_type("Result"));
416        let result_schema = registry.get("Result").unwrap();
417        assert!(result_schema.is_enum());
418        assert_eq!(result_schema.variant_id("Ok"), Some(0));
419        assert_eq!(result_schema.variant_id("Err"), Some(1));
420    }
421
422    #[test]
423    fn test_max_schema_id() {
424        let mut registry = TypeSchemaRegistry::new();
425        let a = registry.register_type("A", vec![("x".to_string(), FieldType::F64)]);
426        let b = registry.register_type("B", vec![("y".to_string(), FieldType::F64)]);
427        assert_eq!(registry.max_schema_id(), Some(a.max(b)));
428    }
429}