1use std::collections::HashMap;
28
29use thiserror::Error;
30
31pub type CollectionId = u16;
33pub type FieldId = u16;
35pub type DocId = u32;
37
38pub type RegistrySegmentRef = (u16, Vec<u8>);
42
43#[derive(Debug, Error, PartialEq, Eq)]
45pub enum RegistryError {
46 #[error("collection id space exhausted")]
48 CollectionOverflow,
49 #[error("unknown collection id {0}")]
51 UnknownCollection(CollectionId),
52 #[error("field id space exhausted for collection {collection_id}")]
54 FieldOverflow {
55 collection_id: CollectionId,
57 },
58 #[error("document id space exhausted for collection {collection_id}")]
60 DocOverflow {
61 collection_id: CollectionId,
63 },
64}
65
66#[derive(Debug, Default)]
68pub struct IdRegistry {
69 collections_by_name: HashMap<String, CollectionId>,
70 collection_names_by_id: HashMap<CollectionId, String>,
71 segments_by_collection: HashMap<CollectionId, RegistrySegment>,
72 segment_refs: HashMap<CollectionId, RegistrySegmentRef>,
73 next_collection_id: u32,
74}
75
76impl IdRegistry {
77 #[must_use]
79 pub fn new() -> Self {
80 Self::default()
81 }
82
83 pub fn get_or_create_collection_id(
85 &mut self,
86 name: &str,
87 ) -> Result<CollectionId, RegistryError> {
88 if let Some(id) = self.collections_by_name.get(name) {
89 return Ok(*id);
90 }
91
92 let id = CollectionId::try_from(self.next_collection_id)
93 .map_err(|_| RegistryError::CollectionOverflow)?;
94 self.next_collection_id += 1;
95
96 self.collections_by_name.insert(name.to_owned(), id);
97 self.collection_names_by_id.insert(id, name.to_owned());
98 self.segments_by_collection
99 .insert(id, RegistrySegment::new());
100 self.segment_refs.insert(id, (0, Vec::new()));
101
102 Ok(id)
103 }
104
105 #[must_use]
107 pub fn collection_id(&self, name: &str) -> Option<CollectionId> {
108 self.collections_by_name.get(name).copied()
109 }
110
111 #[must_use]
113 pub fn collection_name(&self, collection_id: CollectionId) -> Option<&str> {
114 self.collection_names_by_id
115 .get(&collection_id)
116 .map(String::as_str)
117 }
118
119 #[must_use]
121 pub fn segment_ref(&self, collection_id: CollectionId) -> Option<&RegistrySegmentRef> {
122 self.segment_refs.get(&collection_id)
123 }
124
125 pub fn set_segment_ref(
127 &mut self,
128 collection_id: CollectionId,
129 segment_ref: RegistrySegmentRef,
130 ) -> Result<(), RegistryError> {
131 if !self.segments_by_collection.contains_key(&collection_id) {
132 return Err(RegistryError::UnknownCollection(collection_id));
133 }
134 self.segment_refs.insert(collection_id, segment_ref);
135 Ok(())
136 }
137
138 #[must_use]
140 pub fn segment(&self, collection_id: CollectionId) -> Option<&RegistrySegment> {
141 self.segments_by_collection.get(&collection_id)
142 }
143
144 #[must_use]
146 pub fn segment_mut(&mut self, collection_id: CollectionId) -> Option<&mut RegistrySegment> {
147 self.segments_by_collection.get_mut(&collection_id)
148 }
149
150 pub fn get_or_create_field_id(
152 &mut self,
153 collection_id: CollectionId,
154 path: &str,
155 ) -> Result<FieldId, RegistryError> {
156 let segment = self
157 .segments_by_collection
158 .get_mut(&collection_id)
159 .ok_or(RegistryError::UnknownCollection(collection_id))?;
160 segment.get_or_create_field_id(collection_id, path)
161 }
162
163 pub fn get_or_create_doc_internal_id(
165 &mut self,
166 collection_id: CollectionId,
167 external_doc_id: &str,
168 ) -> Result<DocId, RegistryError> {
169 let segment = self
170 .segments_by_collection
171 .get_mut(&collection_id)
172 .ok_or(RegistryError::UnknownCollection(collection_id))?;
173 segment.get_or_create_doc_internal_id(collection_id, external_doc_id)
174 }
175
176 #[must_use]
178 pub fn field_path(&self, collection_id: CollectionId, field_id: FieldId) -> Option<&str> {
179 self.segments_by_collection
180 .get(&collection_id)
181 .and_then(|s| s.field_path(field_id))
182 }
183
184 #[must_use]
186 pub fn doc_external_id(&self, collection_id: CollectionId, doc_id: DocId) -> Option<&str> {
187 self.segments_by_collection
188 .get(&collection_id)
189 .and_then(|s| s.doc_external_id(doc_id))
190 }
191
192 #[must_use]
194 pub fn collection_count(&self) -> usize {
195 self.collections_by_name.len()
196 }
197
198 pub fn remove_collection(&mut self, name: &str) -> Option<CollectionId> {
202 let collection_id = self.collections_by_name.remove(name)?;
203 self.collection_names_by_id.remove(&collection_id);
204 self.segments_by_collection.remove(&collection_id);
205 self.segment_refs.remove(&collection_id);
206 Some(collection_id)
207 }
208}
209
210#[derive(Debug, Default)]
212pub struct RegistrySegment {
213 fields_by_path: HashMap<String, FieldId>,
214 paths_by_field: HashMap<FieldId, String>,
215 docs_by_external: HashMap<String, DocId>,
216 external_by_doc: HashMap<DocId, String>,
217 next_field_id: u32,
218 next_doc_id: u64,
219}
220
221impl RegistrySegment {
222 #[must_use]
224 pub fn new() -> Self {
225 Self::default()
226 }
227
228 pub fn next_doc_id(&self) -> u64 {
230 self.next_doc_id
231 }
232
233 pub fn get_or_create_field_id(
235 &mut self,
236 collection_id: CollectionId,
237 path: &str,
238 ) -> Result<FieldId, RegistryError> {
239 if let Some(field_id) = self.fields_by_path.get(path) {
240 return Ok(*field_id);
241 }
242
243 let field_id = FieldId::try_from(self.next_field_id)
244 .map_err(|_| RegistryError::FieldOverflow { collection_id })?;
245 self.next_field_id += 1;
246
247 self.fields_by_path.insert(path.to_owned(), field_id);
248 self.paths_by_field.insert(field_id, path.to_owned());
249 Ok(field_id)
250 }
251
252 pub fn get_or_create_doc_internal_id(
254 &mut self,
255 collection_id: CollectionId,
256 external_doc_id: &str,
257 ) -> Result<DocId, RegistryError> {
258 if let Some(doc_id) = self.docs_by_external.get(external_doc_id) {
259 return Ok(*doc_id);
260 }
261
262 let doc_id = DocId::try_from(self.next_doc_id)
263 .map_err(|_| RegistryError::DocOverflow { collection_id })?;
264 self.next_doc_id += 1;
265
266 self.docs_by_external
267 .insert(external_doc_id.to_owned(), doc_id);
268 self.external_by_doc
269 .insert(doc_id, external_doc_id.to_owned());
270 Ok(doc_id)
271 }
272
273 #[must_use]
275 pub fn field_id(&self, path: &str) -> Option<FieldId> {
276 self.fields_by_path.get(path).copied()
277 }
278
279 #[must_use]
281 pub fn field_path(&self, field_id: FieldId) -> Option<&str> {
282 self.paths_by_field.get(&field_id).map(String::as_str)
283 }
284
285 #[must_use]
287 pub fn doc_internal_id(&self, external_doc_id: &str) -> Option<DocId> {
288 self.docs_by_external.get(external_doc_id).copied()
289 }
290
291 #[must_use]
293 pub fn doc_external_id(&self, doc_id: DocId) -> Option<&str> {
294 self.external_by_doc.get(&doc_id).map(String::as_str)
295 }
296
297 #[must_use]
299 pub fn field_count(&self) -> usize {
300 self.fields_by_path.len()
301 }
302
303 #[must_use]
305 pub fn doc_count(&self) -> usize {
306 self.docs_by_external.len()
307 }
308
309 #[must_use]
311 pub fn field_mappings(&self) -> Vec<(FieldId, String)> {
312 let mut mappings: Vec<(FieldId, String)> = self
313 .paths_by_field
314 .iter()
315 .map(|(field_id, path)| (*field_id, path.clone()))
316 .collect();
317 mappings.sort_by_key(|(field_id, _)| *field_id);
318 mappings
319 }
320}
321
322#[cfg(test)]
323mod tests {
324 use super::*;
325
326 #[test]
327 fn collection_ids_are_stable() {
328 let mut registry = IdRegistry::new();
329 let a = registry
330 .get_or_create_collection_id("users")
331 .expect("collection id should be allocated");
332 let b = registry
333 .get_or_create_collection_id("users")
334 .expect("collection id should be reused");
335 let c = registry
336 .get_or_create_collection_id("orders")
337 .expect("collection id should be allocated");
338
339 assert_eq!(a, b);
340 assert_ne!(a, c);
341 assert_eq!(registry.collection_name(a), Some("users"));
342 assert_eq!(registry.collection_name(c), Some("orders"));
343 }
344
345 #[test]
346 fn field_and_doc_ids_are_stable() {
347 let mut registry = IdRegistry::new();
348 let collection_id = registry
349 .get_or_create_collection_id("users")
350 .expect("collection should be created");
351
352 let city_a = registry
353 .get_or_create_field_id(collection_id, "address.city")
354 .expect("field id should be created");
355 let city_b = registry
356 .get_or_create_field_id(collection_id, "address.city")
357 .expect("field id should be reused");
358 let zip = registry
359 .get_or_create_field_id(collection_id, "address.zip")
360 .expect("field id should be created");
361
362 assert_eq!(city_a, city_b);
363 assert_ne!(city_a, zip);
364 assert_eq!(
365 registry.field_path(collection_id, city_a),
366 Some("address.city")
367 );
368
369 let doc_a = registry
370 .get_or_create_doc_internal_id(collection_id, "doc:1")
371 .expect("doc id should be created");
372 let doc_b = registry
373 .get_or_create_doc_internal_id(collection_id, "doc:1")
374 .expect("doc id should be reused");
375 let doc_c = registry
376 .get_or_create_doc_internal_id(collection_id, "doc:2")
377 .expect("doc id should be created");
378
379 assert_eq!(doc_a, doc_b);
380 assert_ne!(doc_a, doc_c);
381 assert_eq!(
382 registry.doc_external_id(collection_id, doc_a),
383 Some("doc:1")
384 );
385 }
386
387 #[test]
388 fn unknown_collection_returns_error() {
389 let mut registry = IdRegistry::new();
390 let err = registry
391 .get_or_create_field_id(42, "city")
392 .expect_err("unknown collection should return error");
393 assert_eq!(err, RegistryError::UnknownCollection(42));
394 }
395
396 #[test]
397 fn collection_can_be_removed() {
398 let mut registry = IdRegistry::new();
399 let collection_id = registry
400 .get_or_create_collection_id("users")
401 .expect("collection should be created");
402 assert_eq!(registry.collection_count(), 1);
403
404 let removed = registry.remove_collection("users");
405 assert_eq!(removed, Some(collection_id));
406 assert_eq!(registry.collection_count(), 0);
407 assert_eq!(registry.collection_id("users"), None);
408 }
409
410 #[test]
411 fn field_mappings_are_sorted_by_field_id() {
412 let mut registry = IdRegistry::new();
413 let collection_id = registry
414 .get_or_create_collection_id("users")
415 .expect("collection should be created");
416 registry
417 .get_or_create_field_id(collection_id, "zeta")
418 .expect("field should be created");
419 registry
420 .get_or_create_field_id(collection_id, "alpha")
421 .expect("field should be created");
422
423 let segment = registry
424 .segment(collection_id)
425 .expect("segment should exist");
426 let mappings = segment.field_mappings();
427
428 assert_eq!(mappings.len(), 2);
429 assert_eq!(mappings[0].0, 0);
430 assert_eq!(mappings[1].0, 1);
431 }
432}