1use std::collections::BTreeMap;
4use std::hash::{Hash, Hasher};
5
6use roaring::RoaringBitmap;
7use selene_core::{DbString, DurationOrderKey, Value, duration_order_key};
8use smallvec::SmallVec;
9
10use crate::typed_index::{NotNanError, NotNanF32, NotNanF64, TypedIndexKind, TypedIndexValueError};
11
12pub type CompositeKey = SmallVec<[CompositeKeyComponent; 4]>;
14
15#[derive(Clone, Debug, Eq, PartialEq)]
17pub enum CompositeKeyComponent {
18 Bool(bool),
20 I64(i64),
22 U64(u64),
24 I128(i128),
26 U128(u128),
28 Decimal(rust_decimal::Decimal),
30 F32(NotNanF32),
32 F64(NotNanF64),
34 String(DbString),
36 Date(jiff::civil::Date),
38 LocalDateTime(jiff::civil::DateTime),
40 ZonedDateTime(jiff::Zoned),
42 LocalTime(jiff::civil::Time),
44 ZonedTime(jiff::Zoned),
46 Duration(DurationOrderKey),
48 Uuid(uuid::Uuid),
50}
51
52impl Ord for CompositeKeyComponent {
53 fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
54 use CompositeKeyComponent as K;
55 match (self, rhs) {
56 (K::Bool(lhs), K::Bool(rhs)) => lhs.cmp(rhs),
57 (K::I64(lhs), K::I64(rhs)) => lhs.cmp(rhs),
58 (K::U64(lhs), K::U64(rhs)) => lhs.cmp(rhs),
59 (K::I128(lhs), K::I128(rhs)) => lhs.cmp(rhs),
60 (K::U128(lhs), K::U128(rhs)) => lhs.cmp(rhs),
61 (K::Decimal(lhs), K::Decimal(rhs)) => lhs.cmp(rhs),
62 (K::F32(lhs), K::F32(rhs)) => lhs.cmp(rhs),
63 (K::F64(lhs), K::F64(rhs)) => lhs.cmp(rhs),
64 (K::String(lhs), K::String(rhs)) => lhs.cmp(rhs),
65 (K::Date(lhs), K::Date(rhs)) => lhs.cmp(rhs),
66 (K::LocalDateTime(lhs), K::LocalDateTime(rhs)) => lhs.cmp(rhs),
67 (K::ZonedDateTime(lhs), K::ZonedDateTime(rhs)) => lhs.cmp(rhs),
68 (K::LocalTime(lhs), K::LocalTime(rhs)) => lhs.cmp(rhs),
69 (K::ZonedTime(lhs), K::ZonedTime(rhs)) => lhs.cmp(rhs),
70 (K::Duration(lhs), K::Duration(rhs)) => lhs.cmp(rhs),
71 (K::Uuid(lhs), K::Uuid(rhs)) => lhs.cmp(rhs),
72 _ => self.discriminant().cmp(&rhs.discriminant()),
73 }
74 }
75}
76
77impl PartialOrd for CompositeKeyComponent {
78 fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
79 Some(self.cmp(rhs))
80 }
81}
82
83impl Hash for CompositeKeyComponent {
84 fn hash<H: Hasher>(&self, state: &mut H) {
85 self.discriminant().hash(state);
86 match self {
87 Self::Bool(value) => value.hash(state),
88 Self::I64(value) => value.hash(state),
89 Self::U64(value) => value.hash(state),
90 Self::I128(value) => value.hash(state),
91 Self::U128(value) => value.hash(state),
92 Self::Decimal(value) => value.hash(state),
93 Self::F32(value) => value.hash(state),
94 Self::F64(value) => value.hash(state),
95 Self::String(value) => value.hash(state),
96 Self::Date(value) => value.hash(state),
97 Self::LocalDateTime(value) => value.hash(state),
98 Self::ZonedDateTime(value) => value.hash(state),
99 Self::LocalTime(value) => value.hash(state),
100 Self::ZonedTime(value) => value.hash(state),
101 Self::Duration(value) => value.hash(state),
102 Self::Uuid(value) => value.hash(state),
103 }
104 }
105}
106
107impl CompositeKeyComponent {
108 const fn discriminant(&self) -> u8 {
109 match self {
110 Self::Bool(_) => 0,
111 Self::I64(_) => 1,
112 Self::U64(_) => 2,
113 Self::I128(_) => 3,
114 Self::U128(_) => 4,
115 Self::Decimal(_) => 5,
116 Self::F32(_) => 6,
117 Self::F64(_) => 7,
118 Self::String(_) => 8,
119 Self::Date(_) => 9,
120 Self::LocalDateTime(_) => 10,
121 Self::ZonedDateTime(_) => 11,
122 Self::LocalTime(_) => 12,
123 Self::ZonedTime(_) => 13,
124 Self::Duration(_) => 14,
125 Self::Uuid(_) => 15,
126 }
127 }
128}
129
130#[derive(Clone, Debug)]
132pub struct CompositeTypedIndex {
133 kinds: SmallVec<[TypedIndexKind; 4]>,
134 entries: BTreeMap<CompositeKey, RoaringBitmap>,
135}
136
137impl CompositeTypedIndex {
138 #[must_use]
140 pub fn new(kinds: SmallVec<[TypedIndexKind; 4]>) -> Self {
141 Self {
142 kinds,
143 entries: BTreeMap::new(),
144 }
145 }
146
147 #[must_use]
149 pub fn kinds(&self) -> &[TypedIndexKind] {
150 &self.kinds
151 }
152
153 #[must_use]
159 pub fn cardinality(&self) -> u64 {
160 self.entries.values().map(RoaringBitmap::len).sum()
161 }
162
163 #[must_use]
171 pub fn distinct_keys(&self) -> u64 {
172 self.entries.len() as u64
173 }
174
175 pub fn entries(&self) -> impl Iterator<Item = (&CompositeKey, &RoaringBitmap)> {
177 self.entries.iter()
178 }
179
180 #[must_use]
187 pub(crate) fn buckets_eq(&self, reference: &Self) -> bool {
188 self.kinds == reference.kinds && self.entries == reference.entries
189 }
190
191 #[must_use]
197 pub(crate) fn has_empty_bucket(&self) -> bool {
198 self.entries.values().any(RoaringBitmap::is_empty)
199 }
200
201 pub fn insert(&mut self, values: &[&Value], row: u32) -> Result<(), CompositeIndexValueError> {
203 let key = self.key_from_values(values)?;
204 self.entries.entry(key).or_default().insert(row);
205 Ok(())
206 }
207
208 pub fn remove(&mut self, values: &[&Value], row: u32) -> Result<(), CompositeIndexValueError> {
210 let key = self.key_from_values(values)?;
211 if let Some(bitmap) = self.entries.get_mut(&key) {
212 bitmap.remove(row);
213 if bitmap.is_empty() {
214 self.entries.remove(&key);
215 }
216 }
217 Ok(())
218 }
219
220 #[must_use]
222 pub fn lookup_key(&self, key: &CompositeKey) -> Option<&RoaringBitmap> {
223 self.entries.get(key)
224 }
225
226 pub fn key_from_values(
233 &self,
234 values: &[&Value],
235 ) -> Result<CompositeKey, CompositeIndexValueError> {
236 composite_key_from_values(&self.kinds, values)
237 }
238
239 pub fn values_share_key(&self, lhs: &[&Value], rhs: &[&Value]) -> bool {
244 match (self.key_from_values(lhs), self.key_from_values(rhs)) {
245 (Ok(lhs_key), Ok(rhs_key)) => lhs_key == rhs_key,
246 (Err(_), Err(_)) => true,
247 _ => false,
248 }
249 }
250}
251
252#[derive(Debug)]
254#[non_exhaustive]
255pub enum CompositeIndexValueError {
256 ArityMismatch {
258 expected: usize,
260 observed: usize,
262 },
263 Component {
265 index: usize,
267 expected_kind: TypedIndexKind,
269 observed: &'static str,
271 },
272}
273
274pub(crate) fn composite_key_from_values(
281 kinds: &[TypedIndexKind],
282 values: &[&Value],
283) -> Result<CompositeKey, CompositeIndexValueError> {
284 if kinds.len() != values.len() {
285 return Err(CompositeIndexValueError::ArityMismatch {
286 expected: kinds.len(),
287 observed: values.len(),
288 });
289 }
290 kinds
291 .iter()
292 .zip(values)
293 .enumerate()
294 .map(|(index, (kind, value))| {
295 component_from_value(*kind, value).map_err(|source| {
296 CompositeIndexValueError::Component {
297 index,
298 expected_kind: source.expected_kind(),
299 observed: source.observed(),
300 }
301 })
302 })
303 .collect()
304}
305
306fn component_from_value(
307 kind: TypedIndexKind,
308 value: &Value,
309) -> Result<CompositeKeyComponent, TypedIndexValueError> {
310 match (kind, value) {
311 (TypedIndexKind::Bool, Value::Bool(value)) => Ok(CompositeKeyComponent::Bool(*value)),
312 (TypedIndexKind::I64, Value::Int(value)) => Ok(CompositeKeyComponent::I64(*value)),
313 (TypedIndexKind::U64, Value::Uint(value)) => Ok(CompositeKeyComponent::U64(*value)),
314 (TypedIndexKind::I128, Value::Int128(value)) => Ok(CompositeKeyComponent::I128(*value)),
315 (TypedIndexKind::U128, Value::Uint128(value)) => Ok(CompositeKeyComponent::U128(*value)),
316 (TypedIndexKind::Decimal, Value::Decimal(value)) => {
317 Ok(CompositeKeyComponent::Decimal(*value))
318 }
319 (TypedIndexKind::F32, Value::Float32(value)) => NotNanF32::new(*value)
320 .map(CompositeKeyComponent::F32)
321 .map_err(|NotNanError| TypedIndexValueError::NaN {
322 expected_kind: TypedIndexKind::F32,
323 }),
324 (TypedIndexKind::F64, Value::Float(value)) => NotNanF64::new(*value)
325 .map(CompositeKeyComponent::F64)
326 .map_err(|NotNanError| TypedIndexValueError::NaN {
327 expected_kind: TypedIndexKind::F64,
328 }),
329 (TypedIndexKind::String, Value::String(value)) => {
330 Ok(CompositeKeyComponent::String(value.clone()))
331 }
332 (TypedIndexKind::Date, Value::Date(value)) => Ok(CompositeKeyComponent::Date(*value)),
333 (TypedIndexKind::LocalDateTime, Value::LocalDateTime(value)) => {
334 Ok(CompositeKeyComponent::LocalDateTime(*value))
335 }
336 (TypedIndexKind::ZonedDateTime, Value::ZonedDateTime(value)) => {
337 Ok(CompositeKeyComponent::ZonedDateTime((**value).clone()))
338 }
339 (TypedIndexKind::LocalTime, Value::LocalTime(value)) => {
340 Ok(CompositeKeyComponent::LocalTime(*value))
341 }
342 (TypedIndexKind::ZonedTime, Value::ZonedTime(value)) => {
343 Ok(CompositeKeyComponent::ZonedTime((**value).clone()))
344 }
345 (TypedIndexKind::Duration, Value::Duration(value)) => {
346 Ok(CompositeKeyComponent::Duration(duration_order_key(value)))
347 }
348 (TypedIndexKind::Uuid, Value::Uuid(value)) => Ok(CompositeKeyComponent::Uuid(*value)),
349 (expected_kind, value) => Err(TypedIndexValueError::KindMismatch {
350 expected_kind,
351 observed: crate::typed_index::observed_value_kind(value),
352 }),
353 }
354}
355
356#[cfg(test)]
357mod tests {
358 use selene_core::db_string;
359 use smallvec::smallvec;
360
361 use super::*;
362
363 fn decimal(value: &str) -> rust_decimal::Decimal {
364 value.parse().expect("test decimal parses")
365 }
366
367 #[test]
368 fn component_from_value_string_kind() {
369 let probe = "component_admit.string.unique-1";
370 let value = Value::String(db_string(probe).unwrap());
371
372 let component =
373 component_from_value(TypedIndexKind::String, &value).expect("string component coerces");
374
375 let CompositeKeyComponent::String(db_string) = component else {
376 panic!("expected String component, got {component:?}");
377 };
378 assert_eq!(db_string.as_str(), probe);
379 }
380
381 #[test]
382 fn component_from_value_bool_kind() {
383 let value = Value::Bool(true);
384
385 let component =
386 component_from_value(TypedIndexKind::Bool, &value).expect("bool component coerces");
387
388 assert_eq!(component, CompositeKeyComponent::Bool(true));
389 }
390
391 #[test]
392 fn component_from_value_u64_kind() {
393 let value = Value::Uint(42);
394
395 let component =
396 component_from_value(TypedIndexKind::U64, &value).expect("u64 component coerces");
397
398 assert_eq!(component, CompositeKeyComponent::U64(42));
399 }
400
401 #[test]
402 fn component_from_value_exact_numeric_kinds() {
403 let signed = Value::Int128(i128::MIN + 42);
404 let unsigned = Value::Uint128(u128::MAX - 42);
405 let amount = Value::Decimal(decimal("42.25"));
406
407 assert_eq!(
408 component_from_value(TypedIndexKind::I128, &signed).expect("i128 component coerces"),
409 CompositeKeyComponent::I128(i128::MIN + 42)
410 );
411 assert_eq!(
412 component_from_value(TypedIndexKind::U128, &unsigned).expect("u128 component coerces"),
413 CompositeKeyComponent::U128(u128::MAX - 42)
414 );
415 assert_eq!(
416 component_from_value(TypedIndexKind::Decimal, &amount)
417 .expect("decimal component coerces"),
418 CompositeKeyComponent::Decimal(decimal("42.25"))
419 );
420 }
421
422 #[test]
423 fn component_from_value_float32_kind() {
424 let value = Value::Float32(1.25_f32);
425
426 let component =
427 component_from_value(TypedIndexKind::F32, &value).expect("f32 component coerces");
428
429 assert_eq!(
430 component,
431 CompositeKeyComponent::F32(NotNanF32::new(1.25_f32).unwrap())
432 );
433 }
434
435 #[test]
436 fn component_from_value_duration_kind() {
437 let value = Value::Duration(Box::new("PT1H2S".parse().unwrap()));
438
439 let component = component_from_value(TypedIndexKind::Duration, &value)
440 .expect("duration component coerces");
441
442 assert_eq!(
443 component,
444 CompositeKeyComponent::Duration(selene_core::duration_order_key(match &value {
445 Value::Duration(value) => value,
446 _ => unreachable!("test value is duration"),
447 }))
448 );
449 }
450
451 #[test]
452 fn composite_key_rejects_when_later_component_kind_mismatches() {
453 let kinds: SmallVec<[TypedIndexKind; 4]> =
454 smallvec![TypedIndexKind::String, TypedIndexKind::I64];
455 let location = Value::String(db_string("composite_admit.left_to_right.loc").unwrap());
456 let bad = Value::String(db_string("composite_admit.left_to_right.bad").unwrap());
459 let refs: Vec<&Value> = vec![&location, &bad];
460
461 let err = composite_key_from_values(&kinds, &refs)
462 .expect_err("tuple kind mismatch on later component rejects whole tuple");
463
464 assert!(matches!(
465 err,
466 CompositeIndexValueError::Component {
467 index: 1,
468 expected_kind: TypedIndexKind::I64,
469 observed: "String",
470 }
471 ));
472 }
473
474 #[test]
475 fn composite_key_from_values_admits_string_component() {
476 let kinds: SmallVec<[TypedIndexKind; 4]> =
477 smallvec![TypedIndexKind::I64, TypedIndexKind::String];
478 let ts = Value::Int(7);
479 let location = Value::String(db_string("composite_admit.string.unique-1").unwrap());
480 let refs: Vec<&Value> = vec![&ts, &location];
481
482 let key = composite_key_from_values(&kinds, &refs).expect("string component coerces");
483
484 assert_eq!(key.len(), 2);
485 }
486
487 #[test]
488 fn values_share_key_matches_equal_string_components() {
489 let index =
490 CompositeTypedIndex::new(smallvec![TypedIndexKind::I64, TypedIndexKind::String]);
491 let ts_lhs = Value::Int(1);
492 let ts_rhs = Value::Int(1);
493 let loc_lhs =
494 Value::String(db_string("values_share_key.composite.string.unique-1").unwrap());
495 let loc_rhs =
496 Value::String(db_string("values_share_key.composite.string.unique-1").unwrap());
497 let lhs: Vec<&Value> = vec![&ts_lhs, &loc_lhs];
498 let rhs: Vec<&Value> = vec![&ts_rhs, &loc_rhs];
499
500 assert!(index.values_share_key(&lhs, &rhs));
501 }
502
503 #[test]
504 fn distinct_keys_counts_composite_buckets_not_rows() {
505 let mut index =
506 CompositeTypedIndex::new(smallvec![TypedIndexKind::I64, TypedIndexKind::String]);
507 assert_eq!(index.distinct_keys(), 0, "empty index");
508
509 let k1 = db_string("k1").unwrap();
510 let v_k1 = Value::String(k1);
511 let one = Value::Int(1);
512 let two = Value::Int(2);
513
514 index.insert(&[&one, &v_k1], 0).unwrap();
516 index.insert(&[&one, &v_k1], 1).unwrap();
517 index.insert(&[&two, &v_k1], 2).unwrap();
518 assert_eq!(index.cardinality(), 3);
519 assert_eq!(index.distinct_keys(), 2);
520
521 index.remove(&[&one, &v_k1], 0).unwrap();
523 assert_eq!(index.cardinality(), 2);
524 assert_eq!(index.distinct_keys(), 2);
525
526 index.remove(&[&one, &v_k1], 1).unwrap();
528 assert_eq!(index.cardinality(), 1);
529 assert_eq!(index.distinct_keys(), 1);
530 }
531
532 #[test]
533 fn values_share_key_returns_false_for_distinct_strings() {
534 let index =
535 CompositeTypedIndex::new(smallvec![TypedIndexKind::I64, TypedIndexKind::String]);
536 let ts_lhs = Value::Int(1);
537 let ts_rhs = Value::Int(1);
538 let loc_lhs = Value::String(db_string("values_share_key.composite.lhs-unique").unwrap());
539 let loc_rhs = Value::String(db_string("values_share_key.composite.rhs-unique").unwrap());
540 let lhs: Vec<&Value> = vec![&ts_lhs, &loc_lhs];
541 let rhs: Vec<&Value> = vec![&ts_rhs, &loc_rhs];
542
543 assert!(!index.values_share_key(&lhs, &rhs));
544 }
545}