use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::Hash;
use rand::Rng;
fn google_jump_hash(mut key: u64, num_buckets: i32) -> i32 {
if num_buckets < 1 {
panic!("num_buckets must be a positive integer, got {}", num_buckets);
}
let mut b: i64 = -1; let mut j: i64 = 0;
while j < num_buckets as i64 {
b = j; key = key.wrapping_mul(2862933555777941757).wrapping_add(1);
let dividend = (1i64 << 31) as f64; let divisor = ((key >> 33) + 1) as f64;
let jump_ratio = dividend / divisor;
j = ((b + 1) as f64 * jump_ratio) as i64;
if j < 0 {
j = num_buckets as i64; }
}
b as i32
}
const COMPACT_HASH_MULTIPLIER: u64 = 0xc6a4a7935bd1e995;
#[derive(Debug)]
struct LooseHolder<T: Eq + Hash + Clone> {
a: Vec<Option<T>>,
m: HashMap<T, usize>,
f: Vec<usize>,
}
impl<T: Eq + Hash + Clone> LooseHolder<T> {
fn new() -> Self {
LooseHolder {
a: Vec::new(),
m: HashMap::new(),
f: Vec::new(),
}
}
fn add(&mut self, obj: T) {
if self.m.contains_key(&obj) {
return;
}
if let Some(idx) = self.f.pop() {
self.a[idx] = Some(obj.clone());
self.m.insert(obj, idx);
} else {
self.a.push(Some(obj.clone()));
self.m.insert(obj, self.a.len() - 1);
}
}
fn remove(&mut self, obj: &T) {
if let Some(idx) = self.m.remove(obj) {
self.a[idx] = None;
self.f.push(idx);
}
}
fn get(&self, key: u64) -> Option<T> {
if self.a.is_empty() {
return None;
}
let num_buckets = self.a.len() as i32;
let hash_idx = google_jump_hash(key, num_buckets) as usize;
self.a[hash_idx].as_ref().cloned()
}
fn shrink(&mut self) {
if self.f.is_empty() {
return;
}
let mut new_a: Vec<Option<T>> = Vec::with_capacity(self.m.len());
let mut new_m: HashMap<T, usize> = HashMap::with_capacity(self.m.len());
for opt_item_t in std::mem::take(&mut self.a) {
if let Some(item_t) = opt_item_t {
let new_idx = new_a.len();
new_a.push(Some(item_t.clone())); new_m.insert(item_t, new_idx); }
}
self.a = new_a;
self.m = new_m;
self.f.clear(); }
}
#[derive(Debug)]
struct CompactHolder<T: Eq + Hash + Clone> {
a: Vec<T>,
m: HashMap<T, usize>,
}
impl<T: Eq + Hash + Clone> CompactHolder<T> {
fn new() -> Self {
CompactHolder {
a: Vec::new(),
m: HashMap::new(),
}
}
fn add(&mut self, obj: T) {
if self.m.contains_key(&obj) {
return;
}
self.a.push(obj.clone());
self.m.insert(obj, self.a.len() - 1);
}
fn remove(&mut self, obj: &T) {
if let Some(&idx) = self.m.get(obj) {
self.m.remove(obj);
let _swapped_out_obj = self.a.swap_remove(idx);
if idx < self.a.len() {
let moved_element = &self.a[idx]; self.m.insert(moved_element.clone(), idx);
}
}
}
fn get(&self, key: u64) -> Option<T> {
if self.a.is_empty() {
return None;
}
let num_buckets = self.a.len() as i32;
let h_key = key.wrapping_mul(COMPACT_HASH_MULTIPLIER);
let hash_idx = google_jump_hash(h_key, num_buckets) as usize;
self.a.get(hash_idx).cloned()
}
}
#[derive(Debug)]
pub struct DoubleJumpHash<T: Eq + Hash + Clone> {
loose: LooseHolder<T>,
compact: CompactHolder<T>,
}
impl<T: Eq + Hash + Clone + Debug> DoubleJumpHash<T> {
pub fn new() -> Self {
DoubleJumpHash {
loose: LooseHolder::new(),
compact: CompactHolder::new(),
}
}
pub fn add(&mut self, obj: T) {
self.loose.add(obj.clone());
self.compact.add(obj);
}
pub fn remove(&mut self, obj: &T) {
self.loose.remove(obj);
self.compact.remove(obj);
}
pub fn len(&self) -> usize {
self.compact.a.len()
}
pub fn loose_len(&self) -> usize {
self.loose.a.len()
}
pub fn shrink(&mut self) {
self.loose.shrink();
}
pub fn get(&self, key: u64) -> Option<T> {
if self.loose.a.is_empty() {
return self.compact.get(key);
}
if let Some(obj) = self.loose.get(key) {
Some(obj)
} else {
self.compact.get(key)
}
}
pub fn all(&self) -> Vec<T> {
self.compact.a.clone()
}
pub fn random(&self) -> Option<T> {
if self.compact.a.is_empty() {
None
} else {
let mut rng = rand::rng();
let idx = rng.random_range(0..self.compact.a.len());
self.compact.a.get(idx).cloned()
}
}
}
impl<T: Eq + Hash + Clone + Debug> Default for DoubleJumpHash<T> {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn invariant_check<T: Eq + Hash + Clone + Debug>(h: &DoubleJumpHash<T>) {
assert_eq!(
h.loose.a.len(),
h.loose.m.len() + h.loose.f.len(),
"Loose holder: a.len != m.len + f.len"
);
assert_eq!(h.compact.a.len(), h.compact.m.len(), "Compact holder: a.len != m.len");
for (item, &idx) in &h.loose.m {
assert!(idx < h.loose.a.len(), "Loose map index out of bounds");
assert_eq!(h.loose.a[idx].as_ref(), Some(item), "Loose map item mismatch in 'a'");
}
let mut free_slots_set = std::collections::HashSet::new();
for &idx in &h.loose.f {
assert!(idx < h.loose.a.len(), "Loose free list index out of bounds");
assert!(h.loose.a[idx].is_none(), "Loose free list slot not None");
assert!(free_slots_set.insert(idx), "Duplicate index in free list");
}
for (item, &idx) in &h.compact.m {
assert!(idx < h.compact.a.len(), "Compact map index out of bounds");
assert_eq!(&h.compact.a[idx], item, "Compact map item mismatch in 'a'");
}
assert_eq!(h.all().len(), h.len(), "all().len() != len()");
}
#[test]
fn test_hash_new() {
let h: DoubleJumpHash<i32> = DoubleJumpHash::new();
assert_eq!(h.len(), 0);
assert_eq!(h.loose_len(), 0);
invariant_check(&h);
}
#[test]
fn test_hash_add_basic() {
let mut h = DoubleJumpHash::new();
h.add(100);
assert_eq!(h.len(), 1);
assert_eq!(h.loose_len(), 1);
invariant_check(&h);
h.add(200);
assert_eq!(h.len(), 2);
assert_eq!(h.loose_len(), 2);
invariant_check(&h);
h.add(100); assert_eq!(h.len(), 2);
invariant_check(&h);
}
#[test]
fn test_hash_remove_basic() {
let mut h = DoubleJumpHash::new();
h.add(100);
h.add(200);
h.add(300);
invariant_check(&h);
h.remove(&200);
assert_eq!(h.len(), 2);
assert_eq!(h.loose_len(), 3);
invariant_check(&h);
let all_items = h.all();
assert!(!all_items.contains(&200));
h.remove(&100);
assert_eq!(h.len(), 1);
assert_eq!(h.loose_len(), 3);
invariant_check(&h);
h.remove(&300);
assert_eq!(h.len(), 0);
assert_eq!(h.loose_len(), 3);
invariant_check(&h);
}
#[test]
fn test_hash_get_basic() {
let mut h = DoubleJumpHash::new();
assert!(h.get(123).is_none());
let nodes: Vec<String> = (0..10).map(|i| format!("node{}", i)).collect();
for node in nodes.iter() {
h.add(node.clone());
}
invariant_check(&h);
assert_eq!(h.len(), 10);
for i in 0..30 {
assert!(h.get(i as u64).is_some(), "Expected Some for key {}", i);
}
h.remove(&"node5".to_string());
h.remove(&"node0".to_string());
invariant_check(&h);
assert_eq!(h.len(), 8);
for i in 0..30 {
assert!(h.get(i as u64).is_some(), "Expected Some for key {} after removals", i);
}
for node in nodes.iter() {
h.remove(node);
}
invariant_check(&h);
assert_eq!(h.len(), 0);
assert!(h.get(123).is_none(), "Expected None after all removed");
}
#[test]
fn test_hash_shrink() {
let mut h = DoubleJumpHash::new();
for i in 0..10 {
h.add(i);
}
invariant_check(&h);
for i in 0..5 {
h.remove(&i);
}
assert_eq!(h.len(), 5);
assert_eq!(h.loose_len(), 10);
invariant_check(&h);
h.shrink();
assert_eq!(h.len(), 5);
assert_eq!(h.loose_len(), 5);
assert_eq!(h.loose.f.len(), 0);
invariant_check(&h);
}
#[test]
fn example_usage() {
let mut h = DoubleJumpHash::new();
for i in 0..10 {
h.add(format!("node{}", i));
}
println!("Len: {}", h.len());
println!("LooseLen: {}", h.loose_len());
assert_eq!(h.len(), 10);
assert_eq!(h.loose_len(), 10);
let g1 = h.get(1000);
let g2 = h.get(2000);
let g3 = h.get(3000);
println!("Get(1000): {:?}", g1); println!("Get(2000): {:?}", g2); println!("Get(3000): {:?}", g3);
assert_eq!(g1.unwrap(), "node9".to_string());
assert_eq!(g2.unwrap(), "node2".to_string());
assert_eq!(g3.unwrap(), "node3".to_string());
h.remove(&"node3".to_string());
println!("Len after remove: {}", h.len());
println!("LooseLen after remove: {}", h.loose_len());
assert_eq!(h.len(), 9);
assert_eq!(h.loose_len(), 10);
let g4 = h.get(1000);
let g5 = h.get(2000);
let g6 = h.get(3000);
println!("Get(1000) after remove: {:?}", g4); println!("Get(2000) after remove: {:?}", g5); println!("Get(3000) after remove: {:?}", g6);
assert_eq!(g4.unwrap(), "node9".to_string());
assert_eq!(g5.unwrap(), "node2".to_string());
assert_eq!(g6.unwrap(), "node0".to_string());
}
#[test]
#[should_panic(expected = "num_buckets must be a positive integer, got 0")]
fn google_jump_hash_zero_buckets() {
google_jump_hash(123, 0);
}
#[test]
#[should_panic(expected = "num_buckets must be a positive integer, got -5")]
fn google_jump_hash_negative_buckets() {
google_jump_hash(123, -5);
}
#[test]
fn google_jump_hash_properties() {
let num_buckets = 10;
for k in 0..1000 {
let bucket = google_jump_hash(k, num_buckets);
assert!(bucket >= 0 && bucket < num_buckets);
}
assert_eq!(google_jump_hash(12345, 50), google_jump_hash(12345, 50));
let key = 789;
let b10 = google_jump_hash(key, 10);
let b11 = google_jump_hash(key, 11);
assert!(b11 == b10 || b11 == 10); }
}