Skip to main content

apiary_core/
registry.rs

1//! Registry types for namespace management (Hives, Boxes, Frames).
2//!
3//! The registry is the catalog of all namespaces: which hives, boxes, and frames exist,
4//! and their metadata. It is persisted as versioned JSON files in object storage using
5//! conditional writes for serialization.
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11/// A versioned registry of all hives, boxes, and frames in the system.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
13pub struct Registry {
14    /// Monotonically increasing version number.
15    pub version: u64,
16
17    /// All hives in the registry.
18    pub hives: HashMap<String, Hive>,
19}
20
21impl Registry {
22    /// Create a new empty registry at version 1.
23    pub fn new() -> Self {
24        Self {
25            version: 1,
26            hives: HashMap::new(),
27        }
28    }
29
30    /// Create a registry with a specific version.
31    pub fn with_version(version: u64) -> Self {
32        Self {
33            version,
34            hives: HashMap::new(),
35        }
36    }
37
38    /// Get the next version number.
39    pub fn next_version(&self) -> u64 {
40        self.version + 1
41    }
42
43    /// Check if a hive exists.
44    pub fn has_hive(&self, hive_name: &str) -> bool {
45        self.hives.contains_key(hive_name)
46    }
47
48    /// Get a hive by name.
49    pub fn get_hive(&self, hive_name: &str) -> Option<&Hive> {
50        self.hives.get(hive_name)
51    }
52
53    /// Get a mutable reference to a hive.
54    pub fn get_hive_mut(&mut self, hive_name: &str) -> Option<&mut Hive> {
55        self.hives.get_mut(hive_name)
56    }
57
58    /// Check if a box exists within a hive.
59    pub fn has_box(&self, hive_name: &str, box_name: &str) -> bool {
60        self.hives
61            .get(hive_name)
62            .map(|h| h.boxes.contains_key(box_name))
63            .unwrap_or(false)
64    }
65
66    /// Check if a frame exists within a box.
67    pub fn has_frame(&self, hive_name: &str, box_name: &str, frame_name: &str) -> bool {
68        self.hives
69            .get(hive_name)
70            .and_then(|h| h.boxes.get(box_name))
71            .map(|b| b.frames.contains_key(frame_name))
72            .unwrap_or(false)
73    }
74
75    /// Get a frame by its full path.
76    pub fn get_frame(&self, hive_name: &str, box_name: &str, frame_name: &str) -> Option<&Frame> {
77        self.hives
78            .get(hive_name)
79            .and_then(|h| h.boxes.get(box_name))
80            .and_then(|b| b.frames.get(frame_name))
81    }
82}
83
84impl Default for Registry {
85    fn default() -> Self {
86        Self::new()
87    }
88}
89
90/// A Hive represents a top-level namespace (database).
91#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
92pub struct Hive {
93    /// All boxes (schemas) within this hive.
94    pub boxes: HashMap<String, Box>,
95
96    /// When this hive was created.
97    pub created_at: DateTime<Utc>,
98
99    /// Custom properties for this hive.
100    #[serde(default)]
101    pub properties: HashMap<String, String>,
102}
103
104impl Hive {
105    /// Create a new hive.
106    pub fn new() -> Self {
107        Self {
108            boxes: HashMap::new(),
109            created_at: Utc::now(),
110            properties: HashMap::new(),
111        }
112    }
113}
114
115impl Default for Hive {
116    fn default() -> Self {
117        Self::new()
118    }
119}
120
121/// A Box represents a schema within a hive.
122#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
123pub struct Box {
124    /// All frames (tables) within this box.
125    pub frames: HashMap<String, Frame>,
126
127    /// When this box was created.
128    pub created_at: DateTime<Utc>,
129
130    /// Custom properties for this box.
131    #[serde(default)]
132    pub properties: HashMap<String, String>,
133}
134
135impl Box {
136    /// Create a new box.
137    pub fn new() -> Self {
138        Self {
139            frames: HashMap::new(),
140            created_at: Utc::now(),
141            properties: HashMap::new(),
142        }
143    }
144}
145
146impl Default for Box {
147    fn default() -> Self {
148        Self::new()
149    }
150}
151
152/// A Frame represents a table within a box.
153#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
154pub struct Frame {
155    /// Schema of the frame (Arrow schema JSON).
156    pub schema: serde_json::Value,
157
158    /// Columns to partition by.
159    #[serde(default)]
160    pub partition_by: Vec<String>,
161
162    /// When this frame was created.
163    pub created_at: DateTime<Utc>,
164
165    /// Maximum number of partitions allowed (default: 10,000).
166    #[serde(default = "default_max_partitions")]
167    pub max_partitions: u32,
168
169    /// Custom properties for this frame.
170    #[serde(default)]
171    pub properties: HashMap<String, String>,
172}
173
174fn default_max_partitions() -> u32 {
175    10_000
176}
177
178impl Frame {
179    /// Create a new frame with a schema.
180    pub fn new(schema: serde_json::Value) -> Self {
181        Self {
182            schema,
183            partition_by: Vec::new(),
184            created_at: Utc::now(),
185            max_partitions: default_max_partitions(),
186            properties: HashMap::new(),
187        }
188    }
189
190    /// Create a new frame with schema and partitioning.
191    pub fn with_partitioning(schema: serde_json::Value, partition_by: Vec<String>) -> Self {
192        Self {
193            schema,
194            partition_by,
195            created_at: Utc::now(),
196            max_partitions: default_max_partitions(),
197            properties: HashMap::new(),
198        }
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use serde_json::json;
206
207    #[test]
208    fn test_registry_new() {
209        let registry = Registry::new();
210        assert_eq!(registry.version, 1);
211        assert_eq!(registry.hives.len(), 0);
212    }
213
214    #[test]
215    fn test_registry_next_version() {
216        let registry = Registry::with_version(5);
217        assert_eq!(registry.next_version(), 6);
218    }
219
220    #[test]
221    fn test_registry_has_hive() {
222        let mut registry = Registry::new();
223        assert!(!registry.has_hive("analytics"));
224
225        let hive = Hive::new();
226        registry.hives.insert("analytics".to_string(), hive);
227        assert!(registry.has_hive("analytics"));
228    }
229
230    #[test]
231    fn test_registry_has_box() {
232        let mut registry = Registry::new();
233        let mut hive = Hive::new();
234        let box_ = Box::new();
235        hive.boxes.insert("sensors".to_string(), box_);
236        registry.hives.insert("analytics".to_string(), hive);
237
238        assert!(registry.has_box("analytics", "sensors"));
239        assert!(!registry.has_box("analytics", "nonexistent"));
240        assert!(!registry.has_box("nonexistent", "sensors"));
241    }
242
243    #[test]
244    fn test_registry_has_frame() {
245        let mut registry = Registry::new();
246        let mut hive = Hive::new();
247        let mut box_ = Box::new();
248        let frame = Frame::new(json!({"fields": []}));
249        box_.frames.insert("temperature".to_string(), frame);
250        hive.boxes.insert("sensors".to_string(), box_);
251        registry.hives.insert("analytics".to_string(), hive);
252
253        assert!(registry.has_frame("analytics", "sensors", "temperature"));
254        assert!(!registry.has_frame("analytics", "sensors", "nonexistent"));
255    }
256
257    #[test]
258    fn test_registry_get_frame() {
259        let mut registry = Registry::new();
260        let mut hive = Hive::new();
261        let mut box_ = Box::new();
262        let schema = json!({"fields": [{"name": "value", "type": "int"}]});
263        let frame = Frame::new(schema.clone());
264        box_.frames.insert("temperature".to_string(), frame);
265        hive.boxes.insert("sensors".to_string(), box_);
266        registry.hives.insert("analytics".to_string(), hive);
267
268        let frame = registry.get_frame("analytics", "sensors", "temperature");
269        assert!(frame.is_some());
270        assert_eq!(frame.unwrap().schema, schema);
271    }
272
273    #[test]
274    fn test_hive_new() {
275        let hive = Hive::new();
276        assert_eq!(hive.boxes.len(), 0);
277        assert_eq!(hive.properties.len(), 0);
278    }
279
280    #[test]
281    fn test_box_new() {
282        let box_ = Box::new();
283        assert_eq!(box_.frames.len(), 0);
284        assert_eq!(box_.properties.len(), 0);
285    }
286
287    #[test]
288    fn test_frame_new() {
289        let schema = json!({"fields": []});
290        let frame = Frame::new(schema.clone());
291        assert_eq!(frame.schema, schema);
292        assert_eq!(frame.partition_by.len(), 0);
293        assert_eq!(frame.max_partitions, 10_000);
294    }
295
296    #[test]
297    fn test_frame_with_partitioning() {
298        let schema = json!({"fields": []});
299        let frame = Frame::with_partitioning(
300            schema.clone(),
301            vec!["region".to_string(), "date".to_string()],
302        );
303        assert_eq!(frame.partition_by.len(), 2);
304        assert_eq!(frame.partition_by[0], "region");
305        assert_eq!(frame.partition_by[1], "date");
306    }
307
308    #[test]
309    fn test_registry_serialization() {
310        let registry = Registry::new();
311        let json = serde_json::to_string(&registry).unwrap();
312        let deserialized: Registry = serde_json::from_str(&json).unwrap();
313        assert_eq!(deserialized, registry);
314    }
315
316    #[test]
317    fn test_hive_serialization() {
318        let hive = Hive::new();
319        let json = serde_json::to_string(&hive).unwrap();
320        let deserialized: Hive = serde_json::from_str(&json).unwrap();
321        assert_eq!(deserialized, hive);
322    }
323
324    #[test]
325    fn test_frame_serialization() {
326        let schema = json!({"fields": [{"name": "temp", "type": "float"}]});
327        let frame = Frame::with_partitioning(schema.clone(), vec!["region".to_string()]);
328        let json = serde_json::to_string(&frame).unwrap();
329        let deserialized: Frame = serde_json::from_str(&json).unwrap();
330        assert_eq!(deserialized, frame);
331    }
332}