1use std::collections::{BTreeMap, HashMap};
30
31use thiserror::Error;
32
33use crate::registry::{DocId, FieldId};
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
37pub enum IndexType {
38 Hash,
40 Sorted,
42 Array,
44 Unique,
46}
47
48#[derive(Debug, Error, PartialEq, Eq)]
50pub enum IndexError {
51 #[error("index already exists for field {0}")]
53 AlreadyExists(FieldId),
54 #[error("no index found for field {0}")]
56 NotFound(FieldId),
57 #[error("unique index violation: hash {hash} already maps to doc_id {existing_doc_id}")]
59 UniqueViolation {
60 hash: u32,
62 existing_doc_id: DocId,
64 },
65}
66
67#[derive(Debug, Default)]
69pub struct IndexConfig {
70 entries: HashMap<FieldId, IndexType>,
71}
72
73impl IndexConfig {
74 #[must_use]
76 pub fn new() -> Self {
77 Self::default()
78 }
79
80 pub fn add(&mut self, field_id: FieldId, index_type: IndexType) -> Result<(), IndexError> {
82 if self.entries.contains_key(&field_id) {
83 return Err(IndexError::AlreadyExists(field_id));
84 }
85 self.entries.insert(field_id, index_type);
86 Ok(())
87 }
88
89 pub fn remove(&mut self, field_id: FieldId) -> Result<IndexType, IndexError> {
91 self.entries
92 .remove(&field_id)
93 .ok_or(IndexError::NotFound(field_id))
94 }
95
96 #[must_use]
98 pub fn lookup(&self, field_id: FieldId) -> Option<IndexType> {
99 self.entries.get(&field_id).copied()
100 }
101
102 #[must_use]
104 pub fn entries(&self) -> &HashMap<FieldId, IndexType> {
105 &self.entries
106 }
107}
108
109#[must_use]
111pub fn hash32(data: &[u8]) -> u32 {
112 let mut hash: u32 = 0x811c_9dc5;
113 for &byte in data {
114 hash ^= u32::from(byte);
115 hash = hash.wrapping_mul(0x0100_0193);
116 }
117 hash
118}
119
120#[derive(Debug, Default)]
122pub struct HashIndex {
123 buckets: HashMap<u32, Vec<DocId>>,
124}
125
126impl HashIndex {
127 #[must_use]
129 pub fn new() -> Self {
130 Self::default()
131 }
132
133 pub fn add(&mut self, hash: u32, doc_id: DocId) {
135 let bucket = self.buckets.entry(hash).or_default();
136 match bucket.binary_search(&doc_id) {
137 Ok(_) => {}
138 Err(pos) => bucket.insert(pos, doc_id),
139 }
140 }
141
142 pub fn remove(&mut self, hash: u32, doc_id: DocId) {
144 if let Some(bucket) = self.buckets.get_mut(&hash) {
145 if let Ok(pos) = bucket.binary_search(&doc_id) {
146 bucket.remove(pos);
147 }
148 if bucket.is_empty() {
149 self.buckets.remove(&hash);
150 }
151 }
152 }
153
154 #[must_use]
156 pub fn lookup(&self, hash: u32) -> &[DocId] {
157 self.buckets.get(&hash).map_or(&[], Vec::as_slice)
158 }
159
160 pub fn clear(&mut self) {
162 self.buckets.clear();
163 }
164}
165
166#[derive(Debug, Clone, Copy)]
171pub struct OrderedF64(f64);
172
173impl OrderedF64 {
174 #[must_use]
176 pub fn new(value: f64) -> Self {
177 Self(value)
178 }
179
180 #[must_use]
182 pub fn value(self) -> f64 {
183 self.0
184 }
185
186 fn to_sort_key(self) -> u64 {
187 let bits = self.0.to_bits();
188 if self.0.is_nan() {
189 return u64::MAX;
190 }
191 if bits >> 63 == 1 {
192 !bits
193 } else {
194 bits | (1 << 63)
195 }
196 }
197}
198
199impl PartialEq for OrderedF64 {
200 fn eq(&self, other: &Self) -> bool {
201 self.to_sort_key() == other.to_sort_key()
202 }
203}
204
205impl Eq for OrderedF64 {}
206
207impl PartialOrd for OrderedF64 {
208 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
209 Some(self.cmp(other))
210 }
211}
212
213impl Ord for OrderedF64 {
214 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
215 self.to_sort_key().cmp(&other.to_sort_key())
216 }
217}
218
219#[derive(Debug, Default)]
221pub struct SortedIndex {
222 tree: BTreeMap<OrderedF64, Vec<DocId>>,
223}
224
225impl SortedIndex {
226 #[must_use]
228 pub fn new() -> Self {
229 Self::default()
230 }
231
232 pub fn add(&mut self, key: f64, doc_id: DocId) {
234 let bucket = self.tree.entry(OrderedF64::new(key)).or_default();
235 match bucket.binary_search(&doc_id) {
236 Ok(_) => {}
237 Err(pos) => bucket.insert(pos, doc_id),
238 }
239 }
240
241 pub fn remove(&mut self, key: f64, doc_id: DocId) {
243 let ordered = OrderedF64::new(key);
244 if let Some(bucket) = self.tree.get_mut(&ordered) {
245 if let Ok(pos) = bucket.binary_search(&doc_id) {
246 bucket.remove(pos);
247 }
248 if bucket.is_empty() {
249 self.tree.remove(&ordered);
250 }
251 }
252 }
253
254 #[must_use]
256 pub fn range_query(&self, min: f64, max: f64) -> Vec<DocId> {
257 let lo = OrderedF64::new(min);
258 let hi = OrderedF64::new(max);
259 let mut result = Vec::new();
260 for (_key, bucket) in self.tree.range(lo..=hi) {
261 result.extend_from_slice(bucket);
262 }
263 result.sort_unstable();
264 result.dedup();
265 result
266 }
267
268 pub fn clear(&mut self) {
270 self.tree.clear();
271 }
272}
273
274#[derive(Debug, Default)]
279pub struct UniqueIndex {
280 entries: HashMap<u32, Vec<DocId>>,
281}
282
283impl UniqueIndex {
284 #[must_use]
286 pub fn new() -> Self {
287 Self::default()
288 }
289
290 pub fn add(&mut self, hash: u32, doc_id: DocId) {
292 let bucket = self.entries.entry(hash).or_default();
293 match bucket.binary_search(&doc_id) {
294 Ok(_) => {}
295 Err(pos) => bucket.insert(pos, doc_id),
296 }
297 }
298
299 pub fn remove(&mut self, hash: u32, doc_id: DocId) {
301 if let Some(bucket) = self.entries.get_mut(&hash) {
302 if let Ok(pos) = bucket.binary_search(&doc_id) {
303 bucket.remove(pos);
304 }
305 if bucket.is_empty() {
306 self.entries.remove(&hash);
307 }
308 }
309 }
310
311 #[must_use]
313 pub fn lookup(&self, hash: u32) -> &[DocId] {
314 self.entries.get(&hash).map_or(&[], Vec::as_slice)
315 }
316
317 pub fn clear(&mut self) {
319 self.entries.clear();
320 }
321}
322
323#[derive(Debug, Default)]
325pub struct CollectionIndexes {
326 hash_indexes: HashMap<FieldId, HashIndex>,
327 sorted_indexes: HashMap<FieldId, SortedIndex>,
328 array_indexes: HashMap<FieldId, HashIndex>,
329 unique_indexes: HashMap<FieldId, UniqueIndex>,
330}
331
332impl CollectionIndexes {
333 #[must_use]
335 pub fn new() -> Self {
336 Self::default()
337 }
338
339 pub fn get_or_create_hash(&mut self, field_id: FieldId) -> &mut HashIndex {
341 self.hash_indexes.entry(field_id).or_default()
342 }
343
344 pub fn get_or_create_sorted(&mut self, field_id: FieldId) -> &mut SortedIndex {
346 self.sorted_indexes.entry(field_id).or_default()
347 }
348
349 pub fn get_or_create_array(&mut self, field_id: FieldId) -> &mut HashIndex {
351 self.array_indexes.entry(field_id).or_default()
352 }
353
354 pub fn get_or_create_unique(&mut self, field_id: FieldId) -> &mut UniqueIndex {
356 self.unique_indexes.entry(field_id).or_default()
357 }
358
359 #[must_use]
361 pub fn hash(&self, field_id: FieldId) -> Option<&HashIndex> {
362 self.hash_indexes.get(&field_id)
363 }
364
365 #[must_use]
367 pub fn sorted(&self, field_id: FieldId) -> Option<&SortedIndex> {
368 self.sorted_indexes.get(&field_id)
369 }
370
371 #[must_use]
373 pub fn array(&self, field_id: FieldId) -> Option<&HashIndex> {
374 self.array_indexes.get(&field_id)
375 }
376
377 #[must_use]
379 pub fn unique(&self, field_id: FieldId) -> Option<&UniqueIndex> {
380 self.unique_indexes.get(&field_id)
381 }
382
383 pub fn remove_field(&mut self, field_id: FieldId) {
385 self.hash_indexes.remove(&field_id);
386 self.sorted_indexes.remove(&field_id);
387 self.array_indexes.remove(&field_id);
388 self.unique_indexes.remove(&field_id);
389 }
390}
391
392#[must_use]
394pub fn intersect_sorted(a: &[DocId], b: &[DocId]) -> Vec<DocId> {
395 let mut result = Vec::new();
396 let (mut i, mut j) = (0, 0);
397 while i < a.len() && j < b.len() {
398 match a[i].cmp(&b[j]) {
399 std::cmp::Ordering::Less => i += 1,
400 std::cmp::Ordering::Greater => j += 1,
401 std::cmp::Ordering::Equal => {
402 result.push(a[i]);
403 i += 1;
404 j += 1;
405 }
406 }
407 }
408 result
409}
410
411#[must_use]
413pub fn union_sorted(a: &[DocId], b: &[DocId]) -> Vec<DocId> {
414 let mut result = Vec::with_capacity(a.len() + b.len());
415 let (mut i, mut j) = (0, 0);
416 while i < a.len() && j < b.len() {
417 match a[i].cmp(&b[j]) {
418 std::cmp::Ordering::Less => {
419 result.push(a[i]);
420 i += 1;
421 }
422 std::cmp::Ordering::Greater => {
423 result.push(b[j]);
424 j += 1;
425 }
426 std::cmp::Ordering::Equal => {
427 result.push(a[i]);
428 i += 1;
429 j += 1;
430 }
431 }
432 }
433 result.extend_from_slice(&a[i..]);
434 result.extend_from_slice(&b[j..]);
435 result
436}
437
438#[cfg(test)]
439mod tests {
440 use super::*;
441
442 #[test]
443 fn hash_index_add_remove_lookup() {
444 let mut idx = HashIndex::new();
445 idx.add(100, 1);
446 idx.add(100, 3);
447 idx.add(100, 2);
448 assert_eq!(idx.lookup(100), &[1, 2, 3]);
449
450 idx.remove(100, 2);
451 assert_eq!(idx.lookup(100), &[1, 3]);
452
453 idx.remove(100, 1);
454 idx.remove(100, 3);
455 assert_eq!(idx.lookup(100), &[] as &[DocId]);
456 }
457
458 #[test]
459 fn hash_index_duplicate_add_is_idempotent() {
460 let mut idx = HashIndex::new();
461 idx.add(42, 7);
462 idx.add(42, 7);
463 idx.add(42, 7);
464 assert_eq!(idx.lookup(42), &[7]);
465 }
466
467 #[test]
468 fn hash_index_remove_nonexistent_is_noop() {
469 let mut idx = HashIndex::new();
470 idx.remove(999, 1);
471 idx.add(10, 5);
472 idx.remove(10, 99);
473 assert_eq!(idx.lookup(10), &[5]);
474 }
475
476 #[test]
477 fn sorted_index_add_remove_range_query() {
478 let mut idx = SortedIndex::new();
479 idx.add(1.0, 10);
480 idx.add(3.0, 30);
481 idx.add(2.0, 20);
482 idx.add(2.0, 25);
483
484 let result = idx.range_query(1.5, 3.0);
485 assert_eq!(result, vec![20, 25, 30]);
486
487 idx.remove(2.0, 20);
488 let result = idx.range_query(1.5, 3.0);
489 assert_eq!(result, vec![25, 30]);
490 }
491
492 #[test]
493 fn ordered_f64_ordering_negative_zero_positive() {
494 let neg = OrderedF64::new(-5.0);
495 let zero = OrderedF64::new(0.0);
496 let pos = OrderedF64::new(10.0);
497 let nan = OrderedF64::new(f64::NAN);
498
499 assert!(neg < zero);
500 assert!(zero < pos);
501 assert!(pos < nan);
502 assert!(neg < pos);
503 assert!(neg < nan);
504 }
505
506 #[test]
507 fn unique_index_add_lookup() {
508 let mut idx = UniqueIndex::new();
509 idx.add(42, 1);
510 assert_eq!(idx.lookup(42), &[1]);
511 }
512
513 #[test]
514 fn unique_index_allows_hash_collisions() {
515 let mut idx = UniqueIndex::new();
516 idx.add(42, 1);
517 idx.add(42, 2);
518 assert_eq!(idx.lookup(42), &[1, 2]);
519 }
520
521 #[test]
522 fn unique_index_same_doc_id_same_hash_is_ok() {
523 let mut idx = UniqueIndex::new();
524 idx.add(42, 1);
525 idx.add(42, 1);
526 assert_eq!(idx.lookup(42), &[1]);
527 }
528
529 #[test]
530 fn unique_index_remove_doc_id_only() {
531 let mut idx = UniqueIndex::new();
532 idx.add(42, 1);
533 idx.add(42, 2);
534 idx.remove(42, 1);
535 assert_eq!(idx.lookup(42), &[2]);
536 idx.remove(42, 2);
537 assert_eq!(idx.lookup(42), &[] as &[DocId]);
538 }
539
540 #[test]
541 fn index_config_add_remove_entries() {
542 let mut config = IndexConfig::new();
543 config.add(0, IndexType::Hash).expect("add should succeed");
544 config
545 .add(1, IndexType::Sorted)
546 .expect("add should succeed");
547
548 assert_eq!(config.lookup(0), Some(IndexType::Hash));
549 assert_eq!(config.lookup(1), Some(IndexType::Sorted));
550 assert_eq!(config.entries().len(), 2);
551
552 let removed = config.remove(0).expect("remove should succeed");
553 assert_eq!(removed, IndexType::Hash);
554 assert_eq!(config.lookup(0), None);
555 }
556
557 #[test]
558 fn index_config_duplicate_add_rejected() {
559 let mut config = IndexConfig::new();
560 config.add(0, IndexType::Hash).expect("add should succeed");
561 let err = config
562 .add(0, IndexType::Sorted)
563 .expect_err("duplicate add must fail");
564 assert!(matches!(err, IndexError::AlreadyExists(0)));
565 }
566
567 #[test]
568 fn index_config_remove_nonexistent_rejected() {
569 let mut config = IndexConfig::new();
570 let err = config
571 .remove(99)
572 .expect_err("remove non-existent must fail");
573 assert!(matches!(err, IndexError::NotFound(99)));
574 }
575
576 #[test]
577 fn intersect_sorted_disjoint() {
578 let result = intersect_sorted(&[1, 3, 5], &[2, 4, 6]);
579 assert!(result.is_empty());
580 }
581
582 #[test]
583 fn intersect_sorted_overlapping() {
584 let result = intersect_sorted(&[1, 2, 3, 5], &[2, 3, 4, 5]);
585 assert_eq!(result, vec![2, 3, 5]);
586 }
587
588 #[test]
589 fn intersect_sorted_empty_input() {
590 assert!(intersect_sorted(&[], &[1, 2, 3]).is_empty());
591 assert!(intersect_sorted(&[1, 2, 3], &[]).is_empty());
592 assert!(intersect_sorted(&[], &[]).is_empty());
593 }
594
595 #[test]
596 fn union_sorted_disjoint() {
597 let result = union_sorted(&[1, 3, 5], &[2, 4, 6]);
598 assert_eq!(result, vec![1, 2, 3, 4, 5, 6]);
599 }
600
601 #[test]
602 fn union_sorted_overlapping() {
603 let result = union_sorted(&[1, 2, 3], &[2, 3, 4]);
604 assert_eq!(result, vec![1, 2, 3, 4]);
605 }
606
607 #[test]
608 fn union_sorted_empty_input() {
609 assert_eq!(union_sorted(&[], &[1, 2]), vec![1, 2]);
610 assert_eq!(union_sorted(&[1, 2], &[]), vec![1, 2]);
611 assert!(union_sorted(&[], &[]).is_empty());
612 }
613}