Skip to main content

kora_doc/
registry.rs

1//! Integer-keyed ID registry for collections, fields, and documents.
2//!
3//! The document layer works exclusively with compact numeric identifiers
4//! internally -- [`CollectionId`] (`u16`), [`FieldId`] (`u16`), and
5//! [`DocId`] (`u32`). The [`IdRegistry`] is the single source of truth for
6//! mapping between human-readable names/paths and these numeric IDs.
7//!
8//! ## Two-Level Structure
9//!
10//! - **Global directory** -- maps collection names to `CollectionId` values
11//!   and owns a [`RegistrySegment`] per collection.
12//! - **Per-collection segment** ([`RegistrySegment`]) -- manages bidirectional
13//!   mappings between dotted field paths and `FieldId`, and between external
14//!   document ID strings and internal `DocId` values.
15//!
16//! All IDs are allocated monotonically and never reused. Lookups are O(1)
17//! hash-map operations; creation is idempotent (returning the existing ID if
18//! the name/path was already registered).
19//!
20//! ## Segment References
21//!
22//! Each collection also carries a [`RegistrySegmentRef`] tuple `(shard, key)`
23//! that records where the segment's persistent representation lives in the
24//! shard-affinity storage layer. This pointer is opaque to the registry
25//! itself and managed by the engine.
26
27use std::collections::HashMap;
28
29use thiserror::Error;
30
31/// Compact collection identifier.
32pub type CollectionId = u16;
33/// Compact field identifier.
34pub type FieldId = u16;
35/// Compact internal document identifier.
36pub type DocId = u32;
37
38/// Pointer to a collection-owned registry segment.
39///
40/// Tuple layout: `(owner_shard, segment_key)`.
41pub type RegistrySegmentRef = (u16, Vec<u8>);
42
43/// Errors returned by registry operations.
44#[derive(Debug, Error, PartialEq, Eq)]
45pub enum RegistryError {
46    /// The registry cannot allocate a new collection ID.
47    #[error("collection id space exhausted")]
48    CollectionOverflow,
49    /// The collection is unknown.
50    #[error("unknown collection id {0}")]
51    UnknownCollection(CollectionId),
52    /// The collection cannot allocate a new field ID.
53    #[error("field id space exhausted for collection {collection_id}")]
54    FieldOverflow {
55        /// Collection ID that exhausted field IDs.
56        collection_id: CollectionId,
57    },
58    /// The collection cannot allocate a new internal document ID.
59    #[error("document id space exhausted for collection {collection_id}")]
60    DocOverflow {
61        /// Collection ID that exhausted document IDs.
62        collection_id: CollectionId,
63    },
64}
65
66/// Global registry directory plus collection-owned segments.
67#[derive(Debug, Default)]
68pub struct IdRegistry {
69    collections_by_name: HashMap<String, CollectionId>,
70    collection_names_by_id: HashMap<CollectionId, String>,
71    segments_by_collection: HashMap<CollectionId, RegistrySegment>,
72    segment_refs: HashMap<CollectionId, RegistrySegmentRef>,
73    next_collection_id: u32,
74}
75
76impl IdRegistry {
77    /// Create an empty registry.
78    #[must_use]
79    pub fn new() -> Self {
80        Self::default()
81    }
82
83    /// Return a collection ID for `name`, creating one when needed.
84    pub fn get_or_create_collection_id(
85        &mut self,
86        name: &str,
87    ) -> Result<CollectionId, RegistryError> {
88        if let Some(id) = self.collections_by_name.get(name) {
89            return Ok(*id);
90        }
91
92        let id = CollectionId::try_from(self.next_collection_id)
93            .map_err(|_| RegistryError::CollectionOverflow)?;
94        self.next_collection_id += 1;
95
96        self.collections_by_name.insert(name.to_owned(), id);
97        self.collection_names_by_id.insert(id, name.to_owned());
98        self.segments_by_collection
99            .insert(id, RegistrySegment::new());
100        self.segment_refs.insert(id, (0, Vec::new()));
101
102        Ok(id)
103    }
104
105    /// Return the collection ID for `name` if it exists.
106    #[must_use]
107    pub fn collection_id(&self, name: &str) -> Option<CollectionId> {
108        self.collections_by_name.get(name).copied()
109    }
110
111    /// Return the collection name for `collection_id` if it exists.
112    #[must_use]
113    pub fn collection_name(&self, collection_id: CollectionId) -> Option<&str> {
114        self.collection_names_by_id
115            .get(&collection_id)
116            .map(String::as_str)
117    }
118
119    /// Return the shard/key pointer for a collection segment.
120    #[must_use]
121    pub fn segment_ref(&self, collection_id: CollectionId) -> Option<&RegistrySegmentRef> {
122        self.segment_refs.get(&collection_id)
123    }
124
125    /// Update the shard/key pointer for a collection segment.
126    pub fn set_segment_ref(
127        &mut self,
128        collection_id: CollectionId,
129        segment_ref: RegistrySegmentRef,
130    ) -> Result<(), RegistryError> {
131        if !self.segments_by_collection.contains_key(&collection_id) {
132            return Err(RegistryError::UnknownCollection(collection_id));
133        }
134        self.segment_refs.insert(collection_id, segment_ref);
135        Ok(())
136    }
137
138    /// Return an immutable segment for `collection_id`.
139    #[must_use]
140    pub fn segment(&self, collection_id: CollectionId) -> Option<&RegistrySegment> {
141        self.segments_by_collection.get(&collection_id)
142    }
143
144    /// Return a mutable segment for `collection_id`.
145    #[must_use]
146    pub fn segment_mut(&mut self, collection_id: CollectionId) -> Option<&mut RegistrySegment> {
147        self.segments_by_collection.get_mut(&collection_id)
148    }
149
150    /// Return or create a field ID in a collection segment.
151    pub fn get_or_create_field_id(
152        &mut self,
153        collection_id: CollectionId,
154        path: &str,
155    ) -> Result<FieldId, RegistryError> {
156        let segment = self
157            .segments_by_collection
158            .get_mut(&collection_id)
159            .ok_or(RegistryError::UnknownCollection(collection_id))?;
160        segment.get_or_create_field_id(collection_id, path)
161    }
162
163    /// Return or create an internal document ID in a collection segment.
164    pub fn get_or_create_doc_internal_id(
165        &mut self,
166        collection_id: CollectionId,
167        external_doc_id: &str,
168    ) -> Result<DocId, RegistryError> {
169        let segment = self
170            .segments_by_collection
171            .get_mut(&collection_id)
172            .ok_or(RegistryError::UnknownCollection(collection_id))?;
173        segment.get_or_create_doc_internal_id(collection_id, external_doc_id)
174    }
175
176    /// Resolve field path by field ID.
177    #[must_use]
178    pub fn field_path(&self, collection_id: CollectionId, field_id: FieldId) -> Option<&str> {
179        self.segments_by_collection
180            .get(&collection_id)
181            .and_then(|s| s.field_path(field_id))
182    }
183
184    /// Resolve external document ID by internal document ID.
185    #[must_use]
186    pub fn doc_external_id(&self, collection_id: CollectionId, doc_id: DocId) -> Option<&str> {
187        self.segments_by_collection
188            .get(&collection_id)
189            .and_then(|s| s.doc_external_id(doc_id))
190    }
191
192    /// Number of registered collections.
193    #[must_use]
194    pub fn collection_count(&self) -> usize {
195        self.collections_by_name.len()
196    }
197
198    /// Remove a collection and all associated registry state.
199    ///
200    /// Returns the removed collection ID when present.
201    pub fn remove_collection(&mut self, name: &str) -> Option<CollectionId> {
202        let collection_id = self.collections_by_name.remove(name)?;
203        self.collection_names_by_id.remove(&collection_id);
204        self.segments_by_collection.remove(&collection_id);
205        self.segment_refs.remove(&collection_id);
206        Some(collection_id)
207    }
208}
209
210/// Per-collection registry segment.
211#[derive(Debug, Default)]
212pub struct RegistrySegment {
213    fields_by_path: HashMap<String, FieldId>,
214    paths_by_field: HashMap<FieldId, String>,
215    docs_by_external: HashMap<String, DocId>,
216    external_by_doc: HashMap<DocId, String>,
217    next_field_id: u32,
218    next_doc_id: u64,
219}
220
221impl RegistrySegment {
222    /// Create an empty segment.
223    #[must_use]
224    pub fn new() -> Self {
225        Self::default()
226    }
227
228    /// Return the next document ID that will be assigned (without consuming it).
229    pub fn next_doc_id(&self) -> u64 {
230        self.next_doc_id
231    }
232
233    /// Return or create a field ID for `path`.
234    pub fn get_or_create_field_id(
235        &mut self,
236        collection_id: CollectionId,
237        path: &str,
238    ) -> Result<FieldId, RegistryError> {
239        if let Some(field_id) = self.fields_by_path.get(path) {
240            return Ok(*field_id);
241        }
242
243        let field_id = FieldId::try_from(self.next_field_id)
244            .map_err(|_| RegistryError::FieldOverflow { collection_id })?;
245        self.next_field_id += 1;
246
247        self.fields_by_path.insert(path.to_owned(), field_id);
248        self.paths_by_field.insert(field_id, path.to_owned());
249        Ok(field_id)
250    }
251
252    /// Return or create an internal document ID for `external_doc_id`.
253    pub fn get_or_create_doc_internal_id(
254        &mut self,
255        collection_id: CollectionId,
256        external_doc_id: &str,
257    ) -> Result<DocId, RegistryError> {
258        if let Some(doc_id) = self.docs_by_external.get(external_doc_id) {
259            return Ok(*doc_id);
260        }
261
262        let doc_id = DocId::try_from(self.next_doc_id)
263            .map_err(|_| RegistryError::DocOverflow { collection_id })?;
264        self.next_doc_id += 1;
265
266        self.docs_by_external
267            .insert(external_doc_id.to_owned(), doc_id);
268        self.external_by_doc
269            .insert(doc_id, external_doc_id.to_owned());
270        Ok(doc_id)
271    }
272
273    /// Resolve field ID by field path.
274    #[must_use]
275    pub fn field_id(&self, path: &str) -> Option<FieldId> {
276        self.fields_by_path.get(path).copied()
277    }
278
279    /// Resolve field path by field ID.
280    #[must_use]
281    pub fn field_path(&self, field_id: FieldId) -> Option<&str> {
282        self.paths_by_field.get(&field_id).map(String::as_str)
283    }
284
285    /// Resolve internal document ID by external document ID.
286    #[must_use]
287    pub fn doc_internal_id(&self, external_doc_id: &str) -> Option<DocId> {
288        self.docs_by_external.get(external_doc_id).copied()
289    }
290
291    /// Resolve external document ID by internal document ID.
292    #[must_use]
293    pub fn doc_external_id(&self, doc_id: DocId) -> Option<&str> {
294        self.external_by_doc.get(&doc_id).map(String::as_str)
295    }
296
297    /// Number of field IDs in this segment.
298    #[must_use]
299    pub fn field_count(&self) -> usize {
300        self.fields_by_path.len()
301    }
302
303    /// Number of document IDs in this segment.
304    #[must_use]
305    pub fn doc_count(&self) -> usize {
306        self.docs_by_external.len()
307    }
308
309    /// Return all field mappings as `(field_id, field_path)` sorted by field ID.
310    #[must_use]
311    pub fn field_mappings(&self) -> Vec<(FieldId, String)> {
312        let mut mappings: Vec<(FieldId, String)> = self
313            .paths_by_field
314            .iter()
315            .map(|(field_id, path)| (*field_id, path.clone()))
316            .collect();
317        mappings.sort_by_key(|(field_id, _)| *field_id);
318        mappings
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    #[test]
327    fn collection_ids_are_stable() {
328        let mut registry = IdRegistry::new();
329        let a = registry
330            .get_or_create_collection_id("users")
331            .expect("collection id should be allocated");
332        let b = registry
333            .get_or_create_collection_id("users")
334            .expect("collection id should be reused");
335        let c = registry
336            .get_or_create_collection_id("orders")
337            .expect("collection id should be allocated");
338
339        assert_eq!(a, b);
340        assert_ne!(a, c);
341        assert_eq!(registry.collection_name(a), Some("users"));
342        assert_eq!(registry.collection_name(c), Some("orders"));
343    }
344
345    #[test]
346    fn field_and_doc_ids_are_stable() {
347        let mut registry = IdRegistry::new();
348        let collection_id = registry
349            .get_or_create_collection_id("users")
350            .expect("collection should be created");
351
352        let city_a = registry
353            .get_or_create_field_id(collection_id, "address.city")
354            .expect("field id should be created");
355        let city_b = registry
356            .get_or_create_field_id(collection_id, "address.city")
357            .expect("field id should be reused");
358        let zip = registry
359            .get_or_create_field_id(collection_id, "address.zip")
360            .expect("field id should be created");
361
362        assert_eq!(city_a, city_b);
363        assert_ne!(city_a, zip);
364        assert_eq!(
365            registry.field_path(collection_id, city_a),
366            Some("address.city")
367        );
368
369        let doc_a = registry
370            .get_or_create_doc_internal_id(collection_id, "doc:1")
371            .expect("doc id should be created");
372        let doc_b = registry
373            .get_or_create_doc_internal_id(collection_id, "doc:1")
374            .expect("doc id should be reused");
375        let doc_c = registry
376            .get_or_create_doc_internal_id(collection_id, "doc:2")
377            .expect("doc id should be created");
378
379        assert_eq!(doc_a, doc_b);
380        assert_ne!(doc_a, doc_c);
381        assert_eq!(
382            registry.doc_external_id(collection_id, doc_a),
383            Some("doc:1")
384        );
385    }
386
387    #[test]
388    fn unknown_collection_returns_error() {
389        let mut registry = IdRegistry::new();
390        let err = registry
391            .get_or_create_field_id(42, "city")
392            .expect_err("unknown collection should return error");
393        assert_eq!(err, RegistryError::UnknownCollection(42));
394    }
395
396    #[test]
397    fn collection_can_be_removed() {
398        let mut registry = IdRegistry::new();
399        let collection_id = registry
400            .get_or_create_collection_id("users")
401            .expect("collection should be created");
402        assert_eq!(registry.collection_count(), 1);
403
404        let removed = registry.remove_collection("users");
405        assert_eq!(removed, Some(collection_id));
406        assert_eq!(registry.collection_count(), 0);
407        assert_eq!(registry.collection_id("users"), None);
408    }
409
410    #[test]
411    fn field_mappings_are_sorted_by_field_id() {
412        let mut registry = IdRegistry::new();
413        let collection_id = registry
414            .get_or_create_collection_id("users")
415            .expect("collection should be created");
416        registry
417            .get_or_create_field_id(collection_id, "zeta")
418            .expect("field should be created");
419        registry
420            .get_or_create_field_id(collection_id, "alpha")
421            .expect("field should be created");
422
423        let segment = registry
424            .segment(collection_id)
425            .expect("segment should exist");
426        let mappings = segment.field_mappings();
427
428        assert_eq!(mappings.len(), 2);
429        assert_eq!(mappings[0].0, 0);
430        assert_eq!(mappings[1].0, 1);
431    }
432}