use std::collections::BTreeSet;
use thiserror::Error;
#[derive(Debug, Error, PartialEq)]
pub enum RandomSlicesError {
#[error("random_slicing: at least one claimant is required")]
EmptyClaimants,
#[error("random_slicing: duplicate claimant '{name}'")]
DuplicateClaimant {
name: String,
},
#[error("random_slicing: invalid weight for '{name}': {weight}")]
InvalidWeight {
name: String,
weight: f64,
},
#[error("random_slicing: claimant size sum {sum} exceeds u64::MAX")]
OversizedSizeSum {
sum: u128,
},
#[error("random_slicing: zero-sized interval at index {index} for '{name}'")]
ZeroInterval {
index: usize,
name: String,
},
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct RandomSlices {
lower_bounds: Vec<u64>,
interval_sizes: Vec<u64>,
claimants: Vec<String>,
}
impl RandomSlices {
pub fn from_uniform(claimants: &[&str]) -> Result<Self, RandomSlicesError> {
if claimants.is_empty() {
return Err(RandomSlicesError::EmptyClaimants);
}
check_unique(claimants.iter().copied())?;
let n = claimants.len() as u64;
let base = u64::MAX / n;
let used = base.checked_mul(n).unwrap_or(0);
let remainder = u64::MAX - used;
let mut sizes: Vec<u64> = vec![base; claimants.len()];
if let Some(last) = sizes.last_mut() {
*last = last.saturating_add(remainder);
}
let lower_bounds = build_lower_bounds(&sizes);
Ok(Self {
lower_bounds,
interval_sizes: sizes,
claimants: claimants.iter().map(|s| (*s).to_string()).collect(),
})
}
pub fn from_weights(
weights: &[(String, f64)],
decimal_digits: usize,
) -> Result<Self, RandomSlicesError> {
if weights.is_empty() {
return Err(RandomSlicesError::EmptyClaimants);
}
check_unique(weights.iter().map(|(n, _)| n.as_str()))?;
let scale = 10f64.powi(i32::try_from(decimal_digits).unwrap_or(0));
let mut rounded: Vec<f64> = Vec::with_capacity(weights.len());
for (name, w) in weights {
if !w.is_finite() || *w < 0.0 {
return Err(RandomSlicesError::InvalidWeight {
name: name.clone(),
weight: *w,
});
}
let r = (w * scale).round() / scale;
rounded.push(r);
}
let sum: f64 = rounded.iter().sum();
if !sum.is_finite() || sum <= 0.0 {
let first = weights[0].clone();
return Err(RandomSlicesError::InvalidWeight {
name: first.0,
weight: first.1,
});
}
#[allow(clippy::cast_precision_loss)]
let max_f = u64::MAX as f64;
let mut sizes: Vec<u64> = Vec::with_capacity(weights.len());
let last_idx = weights.len() - 1;
let mut consumed: u128 = 0;
for (idx, _) in weights.iter().enumerate() {
if idx == last_idx {
let mut last = u128::from(u64::MAX).saturating_sub(consumed);
if last == 0 {
last = 1;
}
let last_u64 = u64::try_from(last.min(u128::from(u64::MAX))).unwrap_or(u64::MAX);
sizes.push(last_u64);
break;
}
let frac = rounded[idx] / sum;
let raw = max_f * frac;
let size: u64 = if raw <= 0.0 {
0
} else if raw >= max_f {
u64::MAX
} else {
raw as u64
};
consumed = consumed.saturating_add(u128::from(size));
sizes.push(size);
}
if sizes.iter().all(|s| *s == 0) {
let first = weights[0].clone();
return Err(RandomSlicesError::InvalidWeight {
name: first.0,
weight: first.1,
});
}
let mut promoted_zeros: u64 = 0;
for (idx, s) in sizes.iter_mut().enumerate() {
if *s == 0 && idx != last_idx {
*s = 1;
promoted_zeros = promoted_zeros.saturating_add(1);
}
}
if promoted_zeros > 0 {
if let Some(last) = sizes.last_mut() {
*last = last.saturating_sub(promoted_zeros).max(1);
}
}
let pairs: Vec<(String, u64)> = weights
.iter()
.zip(sizes.iter())
.map(|((n, _), s)| (n.clone(), *s))
.collect();
Self::from_sizes(&pairs)
}
pub fn from_sizes(sizes: &[(String, u64)]) -> Result<Self, RandomSlicesError> {
if sizes.is_empty() {
return Err(RandomSlicesError::EmptyClaimants);
}
check_unique(sizes.iter().map(|(n, _)| n.as_str()))?;
let mut sum: u128 = 0;
for (idx, (name, s)) in sizes.iter().enumerate() {
if *s == 0 {
return Err(RandomSlicesError::ZeroInterval {
index: idx,
name: name.clone(),
});
}
sum += u128::from(*s);
if sum > u128::from(u64::MAX) {
return Err(RandomSlicesError::OversizedSizeSum { sum });
}
}
let mut interval_sizes: Vec<u64> = sizes.iter().map(|(_, s)| *s).collect();
let used: u128 = sum;
let remainder = u128::from(u64::MAX) - used;
if remainder > 0 {
if let Some(last) = interval_sizes.last_mut() {
let bumped = u128::from(*last) + remainder;
*last = u64::try_from(bumped).expect("invariant: remainder fits");
}
}
let lower_bounds = build_lower_bounds(&interval_sizes);
Ok(Self {
lower_bounds,
interval_sizes,
claimants: sizes.iter().map(|(n, _)| n.clone()).collect(),
})
}
#[must_use]
pub fn len(&self) -> usize {
self.claimants.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.claimants.is_empty()
}
#[must_use]
pub fn claimant_for(&self, hash: u64) -> Option<&str> {
let idx = self.index_for(hash)?;
self.claimants.get(idx).map(String::as_str)
}
#[must_use]
pub fn index_for(&self, hash: u64) -> Option<usize> {
let n = self.lower_bounds.len();
if n == 0 {
return None;
}
let last = n - 1;
if hash >= self.lower_bounds[last] {
return Some(last);
}
let mut lo = 0usize;
let mut hi = last;
while lo < hi {
let mid = lo + (hi - lo).div_ceil(2);
if self.lower_bounds[mid] <= hash {
lo = mid;
} else {
hi = mid - 1;
}
}
Some(lo)
}
#[must_use]
pub fn claimants(&self) -> &[String] {
&self.claimants
}
#[must_use]
pub fn lower_bounds(&self) -> &[u64] {
&self.lower_bounds
}
#[must_use]
pub fn interval_sizes(&self) -> &[u64] {
&self.interval_sizes
}
pub fn intervals(&self) -> impl Iterator<Item = (&str, u64, u64)> + '_ {
self.claimants
.iter()
.zip(self.lower_bounds.iter().copied())
.zip(self.interval_sizes.iter().copied())
.map(|((name, lb), sz)| (name.as_str(), lb, sz))
}
}
fn build_lower_bounds(sizes: &[u64]) -> Vec<u64> {
let mut lb: Vec<u64> = Vec::with_capacity(sizes.len());
let mut acc: u64 = 0;
for s in sizes {
lb.push(acc);
acc = acc.saturating_add(*s);
}
lb
}
fn check_unique<'a, I: IntoIterator<Item = &'a str>>(it: I) -> Result<(), RandomSlicesError> {
let mut seen: BTreeSet<&str> = BTreeSet::new();
for name in it {
if !seen.insert(name) {
return Err(RandomSlicesError::DuplicateClaimant {
name: name.to_string(),
});
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_input_rejected() {
assert_eq!(
RandomSlices::from_uniform(&[]),
Err(RandomSlicesError::EmptyClaimants)
);
let empty_sizes: Vec<(String, u64)> = Vec::new();
assert_eq!(
RandomSlices::from_sizes(&empty_sizes),
Err(RandomSlicesError::EmptyClaimants)
);
let empty_weights: Vec<(String, f64)> = Vec::new();
assert_eq!(
RandomSlices::from_weights(&empty_weights, 2),
Err(RandomSlicesError::EmptyClaimants)
);
}
#[test]
fn duplicate_rejected() {
let err = RandomSlices::from_uniform(&["a", "a"]).unwrap_err();
assert!(matches!(err, RandomSlicesError::DuplicateClaimant { .. }));
}
#[test]
fn uniform_partition_covers_full_ring() {
let s = RandomSlices::from_uniform(&["a", "b", "c", "d"]).unwrap();
assert_eq!(s.len(), 4);
let total: u128 = s.interval_sizes().iter().map(|&n| u128::from(n)).sum();
assert_eq!(total, u128::from(u64::MAX));
assert_eq!(s.lower_bounds()[0], 0);
for w in s.lower_bounds().windows(2) {
assert!(w[0] < w[1]);
}
}
#[test]
fn lookup_at_extreme_values_works() {
let s = RandomSlices::from_uniform(&["a", "b", "c", "d"]).unwrap();
assert_eq!(s.claimant_for(0).unwrap(), "a");
assert_eq!(s.claimant_for(u64::MAX).unwrap(), "d");
}
#[test]
fn lookup_at_boundaries_picks_upper_interval() {
let s = RandomSlices::from_uniform(&["a", "b"]).unwrap();
let split = s.lower_bounds()[1];
assert_eq!(s.claimant_for(split).unwrap(), "b");
assert_eq!(s.claimant_for(split - 1).unwrap(), "a");
}
#[test]
fn weights_translate_to_proportional_sizes() {
let pairs = vec![("a".to_string(), 1.0), ("b".to_string(), 3.0)];
let s = RandomSlices::from_weights(&pairs, 2).unwrap();
assert_eq!(s.len(), 2);
let a_size = u128::from(s.interval_sizes()[0]);
let b_size = u128::from(s.interval_sizes()[1]);
#[allow(clippy::cast_precision_loss)]
let ratio = (b_size as f64) / (a_size as f64);
assert!(
ratio > 2.9 && ratio < 3.1,
"expected ~3x ratio, got {ratio}"
);
}
#[test]
fn weights_reject_negative() {
let pairs = vec![("a".to_string(), -1.0)];
let err = RandomSlices::from_weights(&pairs, 2).unwrap_err();
assert!(matches!(err, RandomSlicesError::InvalidWeight { .. }));
}
#[test]
fn weights_reject_non_finite() {
let pairs = vec![("a".to_string(), f64::NAN)];
let err = RandomSlices::from_weights(&pairs, 2).unwrap_err();
assert!(matches!(err, RandomSlicesError::InvalidWeight { .. }));
}
#[test]
fn weights_reject_all_zero_rounded() {
let pairs = vec![("a".to_string(), 0.001), ("b".to_string(), 0.001)];
let err = RandomSlices::from_weights(&pairs, 2).unwrap_err();
assert!(matches!(err, RandomSlicesError::InvalidWeight { .. }));
}
#[test]
fn sizes_reject_zero_interval() {
let pairs = vec![("a".to_string(), 0u64)];
let err = RandomSlices::from_sizes(&pairs).unwrap_err();
assert!(matches!(err, RandomSlicesError::ZeroInterval { .. }));
}
#[test]
fn sizes_reject_overflow() {
let pairs = vec![("a".to_string(), u64::MAX), ("b".to_string(), 1u64)];
let err = RandomSlices::from_sizes(&pairs).unwrap_err();
assert!(matches!(err, RandomSlicesError::OversizedSizeSum { .. }));
}
#[test]
fn round_trip_1k_random_keys_under_5_percent() {
use crate::hashkit::{hash64, HashType};
let s = RandomSlices::from_uniform(&["a", "b", "c", "d"]).unwrap();
let mut counts = [0u32; 4];
let total = 1024usize;
for i in 0..total {
let key = format!("key-{i:08x}");
let h = hash64(HashType::Murmur3X64_64, key.as_bytes());
let idx = s.index_for(h).unwrap();
counts[idx] += 1;
}
let expected = (total / 4) as u32;
for (i, &c) in counts.iter().enumerate() {
let lo = expected - expected / 4;
let hi = expected + expected / 4;
assert!(
c >= lo && c <= hi,
"claimant {i}: count {c} outside [{lo}, {hi}]"
);
}
}
}