use crate::fasthash;
use super::range_map;
#[derive(Debug, Clone)]
#[allow(non_snake_case)]
pub(crate) struct Anchor {
capacity: u16,
A: Vec<u16>,
R: Vec<u16>,
N: u16,
W: Vec<u16>,
K: Vec<u16>,
L: Vec<u16>,
}
impl Anchor {
pub(crate) fn new(capacity: u16, working: u16) -> Self {
assert!(
working <= capacity,
"working bucket count must not exceed capacity"
);
let mut anchor = Self {
capacity,
A: vec![0; capacity as _],
R: (working..capacity).rev().collect(),
N: working,
K: (0..capacity).into_iter().collect(),
L: (0..capacity).into_iter().collect(),
W: (0..capacity).into_iter().collect(),
};
for b in working..capacity {
anchor.A[b as usize] = b;
}
anchor
}
pub(crate) fn get_bucket(&self, k: u32) -> u16 {
let mut b = range_map(k, self.capacity as u32) as usize;
while self.A[b] > 0 {
let bs = fasthash(b as u32, k);
let mut h = range_map(bs, self.A[b] as u32);
while self.A[h as usize] >= self.A[b] {
h = self.K[h as usize] as _;
}
b = h as _;
}
b as u16
}
pub(crate) fn add_bucket(&mut self) -> Option<u16> {
let b = self.R.pop()? as usize;
self.A[b] = 0;
self.L[self.W[self.N as usize] as usize] = self.N;
self.W[self.L[b] as usize] = b as _;
self.K[b] = b as _;
self.N += 1;
Some(b as u16)
}
pub(crate) fn remove_bucket(&mut self, b: u16) {
assert_eq!(self.A[b as usize], 0);
self.R.push(b);
let b = b as usize;
self.N -= 1;
self.A[b] = self.N;
self.W[self.L[b] as usize] = self.W[self.N as usize];
self.K[b] = self.W[self.N as usize];
self.L[self.W[self.N as usize] as usize] = self.L[b];
}
#[cfg(test)]
pub(crate) fn working_buckets(&self) -> Vec<u16> {
if self.N == 0 {
return Vec::new();
}
let w = self
.A
.iter()
.enumerate()
.filter(|(_i, &v)| v == 0)
.map(|(i, _v)| i as u16)
.collect::<Vec<u16>>();
w
}
}
#[cfg(test)]
mod tests {
use std::{
cmp::{max, min},
collections::HashSet,
};
use super::*;
use hashbrown::HashMap;
use quickcheck_macros::quickcheck;
#[test]
fn test_init_empty() {
const WANT_SIZE: usize = 20;
let a = Anchor::new(WANT_SIZE as _, 0);
assert_eq!(a.A.len(), WANT_SIZE);
assert!(a.A.iter().enumerate().all(|(i, &v)| i == v as usize));
assert_eq!(a.R.len(), WANT_SIZE); assert_eq!(a.N, 0);
assert_eq!(a.K.len(), WANT_SIZE);
assert_eq!(a.L.len(), WANT_SIZE);
assert_eq!(a.W.len(), WANT_SIZE);
for i in 0..WANT_SIZE {
assert_eq!(a.K[i], i as u16);
assert_eq!(a.L[i], i as u16);
assert_eq!(a.W[i], i as u16);
}
}
#[test]
fn test_init_populated() {
const WANT_SIZE: usize = 20;
const WORKING: usize = 15;
let a = Anchor::new(WANT_SIZE as _, WORKING as _);
assert_eq!(a.A.len(), WANT_SIZE);
assert!(a.A.iter().take(WORKING).all(|&v| v == 0));
for (&i, v) in a.A.iter().skip(WORKING).zip(WORKING..) {
assert_eq!(i, v as u16);
}
assert_eq!(a.R, vec![19, 18, 17, 16, 15]);
assert_eq!(a.R.len(), WANT_SIZE - WORKING);
assert_eq!(a.N, WORKING as u16);
assert_eq!(a.K.len(), WANT_SIZE);
assert_eq!(a.L.len(), WANT_SIZE);
assert_eq!(a.W.len(), WANT_SIZE);
for i in 0..WANT_SIZE {
assert_eq!(a.K[i], i as u16);
assert_eq!(a.L[i], i as u16);
assert_eq!(a.W[i], i as u16);
}
}
#[test]
fn test_add_bucket_full_anchor() {
const SIZE: u16 = 20;
let mut a = Anchor::new(SIZE, SIZE);
if a.add_bucket().is_some() {
panic!("adding bucket to full anchor should fail");
}
}
#[quickcheck]
fn test_get_returns_working_buckets(mut keys: Vec<u16>) -> bool {
let num_buckets = match keys.pop() {
Some(0) => return true,
Some(v) => v,
None => return true,
};
let mut a = Anchor::new(num_buckets, 0);
let mut working = HashSet::with_capacity(num_buckets as _);
for _i in 0..num_buckets {
let b = a.add_bucket().unwrap();
working.insert(b);
}
for k in keys {
let got = a.get_bucket(k as _);
if !working.contains(&got) {
return false;
}
if working.len() > 1 {
a.remove_bucket(got);
assert!(working.remove(&got));
}
}
true
}
#[test]
fn test_bucket_balance() {
use rand::prelude::*;
let mut rng = rand::thread_rng();
const WORKING_BUCKETS: u16 = 10;
const KEYS: usize = 10_000;
let a = Anchor::new(200, WORKING_BUCKETS);
let mut seen = HashMap::new();
for _ in 0..KEYS {
let k = rng.gen();
let got = a.get_bucket(k);
let counter = seen.entry(got).or_insert(0);
*counter += 1;
}
assert_eq!(seen.len(), WORKING_BUCKETS as _);
let mut got_min = 0;
let mut got_max = 0;
for (k, v) in seen.into_iter() {
println!("bucket {} has {} hits", k, v);
got_min = min(v, got_min);
got_max = max(v, got_max);
}
if (f64::from(got_max) * 0.9) < f64::from(got_min) {
panic!(
"expected max bucket hits ({}) to be within 10% of min bucket hits ({})",
got_max, got_min
);
}
}
}