algos/cs/hashing/
separate.rs

1//! # Separate Chaining Hash Table
2//!
3//! This module implements a **HashMap** using **separate chaining** in Rust, aimed at production-like usage.
4//! It supports:
5//! - **Generic** key-value pairs (`K: Hash + Eq, V`).
6//! - **Customizable** capacity growth and load factor threshold.
7//! - **Configurable** hasher using `BuildHasher` traits, with user-defined or random seeds.
8//! - **Insert**, **get**, **remove**, **iter** (basic iteration) operations with expected **O(1)** average performance.
9//!
10//! This is not a standard library replacement but is designed as a robust demonstration for real usage.
11
12use std::collections::hash_map::RandomState;
13use std::hash::{BuildHasher, Hash};
14
15/// Default initial capacity if none specified.
16const DEFAULT_INITIAL_CAPACITY: usize = 16;
17
18/// Default maximum load factor before resizing.
19const DEFAULT_MAX_LOAD_FACTOR: f64 = 0.75;
20
21/// A single entry in a chain: `(K, V)`.
22#[derive(Debug)]
23struct Entry<K, V> {
24    key: K,
25    value: V,
26}
27
28/// A "bucket" is a vector of entries for separate chaining.
29type Bucket<K, V> = Vec<Entry<K, V>>;
30
31/// A separate-chaining HashMap with generic `K, V` and a customizable hasher.
32#[derive(Debug)]
33pub struct ChainedHashMap<K, V, S = RandomState> {
34    buckets: Vec<Bucket<K, V>>,
35    /// The number of stored key-value pairs.
36    len: usize,
37    /// The capacity in terms of how many buckets we have.
38    bucket_count: usize,
39    /// The maximum load factor (ratio = len / bucket_count).
40    max_load_factor: f64,
41    /// Hasher builder.
42    build_hasher: S,
43}
44
45/// A builder for the `ChainedHashMap`.
46/// Typically you'll call `.with_hasher(...)`, `.with_capacity(...)`, etc., then `.build()`.
47#[derive(Debug)]
48pub struct ChainedHashMapBuilder<S> {
49    capacity: usize,
50    max_load_factor: f64,
51    hasher: S,
52}
53
54impl Default for ChainedHashMapBuilder<RandomState> {
55    fn default() -> Self {
56        Self {
57            capacity: DEFAULT_INITIAL_CAPACITY,
58            max_load_factor: DEFAULT_MAX_LOAD_FACTOR,
59            hasher: RandomState::new(),
60        }
61    }
62}
63
64impl ChainedHashMapBuilder<RandomState> {
65    /// Creates a new builder with default capacity and default hasher (RandomState).
66    pub fn new() -> Self {
67        Default::default()
68    }
69}
70
71impl<S: BuildHasher + Clone> ChainedHashMapBuilder<S> {
72    /// Sets an explicit capacity (number of buckets).
73    /// The actual data capacity for key-values is unbounded; we just store them in buckets.
74    pub fn with_capacity(mut self, capacity: usize) -> Self {
75        self.capacity = capacity.max(1);
76        self
77    }
78
79    /// Sets the maximum load factor. If `len / bucket_count` exceeds it, we resize.
80    pub fn with_max_load_factor(mut self, lf: f64) -> Self {
81        assert!(lf > 0.0, "Load factor must be > 0");
82        self.max_load_factor = lf;
83        self
84    }
85
86    /// Sets a custom hasher builder.
87    pub fn with_hasher<T: BuildHasher + Clone>(self, hasher: T) -> ChainedHashMapBuilder<T> {
88        ChainedHashMapBuilder {
89            capacity: self.capacity,
90            max_load_factor: self.max_load_factor,
91            hasher,
92        }
93    }
94
95    /// Build the final `ChainedHashMap`.
96    pub fn build<K: Hash + Eq, V>(self) -> ChainedHashMap<K, V, S> {
97        let bucket_count = self.capacity;
98        let mut buckets = Vec::with_capacity(bucket_count);
99        buckets.resize_with(bucket_count, Default::default);
100
101        ChainedHashMap {
102            buckets,
103            len: 0,
104            bucket_count,
105            max_load_factor: self.max_load_factor,
106            build_hasher: self.hasher,
107        }
108    }
109}
110
111impl<K: Hash + Eq, V> ChainedHashMap<K, V> {
112    /// Creates a new map with default capacity and default hasher.
113    pub fn new() -> Self {
114        ChainedHashMapBuilder::new().build()
115    }
116
117    /// Creates a new map with a specified initial capacity and default hasher.
118    pub fn with_capacity(cap: usize) -> Self {
119        ChainedHashMapBuilder::new().with_capacity(cap).build()
120    }
121}
122
123impl<K: Hash + Eq, V, S: BuildHasher + Clone> ChainedHashMap<K, V, S> {
124    /// Returns the number of key-value pairs in the map.
125    pub fn len(&self) -> usize {
126        self.len
127    }
128
129    /// Returns true if the map is empty.
130    pub fn is_empty(&self) -> bool {
131        self.len == 0
132    }
133
134    /// Inserts a key-value pair into the map.
135    /// If the key already exists, its value is replaced and the old value returned.
136    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
137        // If load factor exceeded, grow
138        if (self.len + 1) as f64 / (self.bucket_count as f64) > self.max_load_factor {
139            self.resize();
140        }
141
142        let bucket_index = self.bucket_index(&key);
143        let bucket = &mut self.buckets[bucket_index];
144
145        // Check if key exists
146        for entry in bucket.iter_mut() {
147            if entry.key == key {
148                let oldv = std::mem::replace(&mut entry.value, value);
149                return Some(oldv);
150            }
151        }
152        // Insert
153        bucket.push(Entry { key, value });
154        self.len += 1;
155        None
156    }
157
158    /// Returns a reference to the value corresponding to the key, if present.
159    pub fn get(&self, key: &K) -> Option<&V> {
160        let idx = self.bucket_index(key);
161        for entry in &self.buckets[idx] {
162            if &entry.key == key {
163                return Some(&entry.value);
164            }
165        }
166        None
167    }
168
169    /// Returns a mutable reference to the value corresponding to the key, if present.
170    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
171        let idx = self.bucket_index(key);
172        for entry in &mut self.buckets[idx] {
173            if &entry.key == key {
174                return Some(&mut entry.value);
175            }
176        }
177        None
178    }
179
180    /// Removes and returns the value for the specified key, if present.
181    pub fn remove(&mut self, key: &K) -> Option<V> {
182        let idx = self.bucket_index(key);
183        let bucket = &mut self.buckets[idx];
184        let mut i = 0;
185        while i < bucket.len() {
186            if &bucket[i].key == key {
187                let entry = bucket.swap_remove(i);
188                self.len -= 1;
189                return Some(entry.value);
190            }
191            i += 1;
192        }
193        None
194    }
195
196    /// Clears the map, removing all key-value pairs.
197    pub fn clear(&mut self) {
198        for bucket in &mut self.buckets {
199            bucket.clear();
200        }
201        self.len = 0;
202    }
203
204    /// Returns an iterator over the key-value pairs in the map.
205    pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> {
206        self.buckets
207            .iter()
208            .flat_map(|bucket| bucket.iter().map(|entry| (&entry.key, &entry.value)))
209    }
210
211    /// Internal function computing the bucket index for a given key.
212    fn bucket_index(&self, key: &K) -> usize {
213        (self.build_hasher.hash_one(key) as usize) % self.bucket_count
214    }
215
216    /// Resize the map (roughly doubling the number of buckets) and re-insert existing entries.
217    fn resize(&mut self) {
218        let new_bucket_count = (self.bucket_count * 2).max(1);
219        let mut new_buckets: Vec<Bucket<K, V>> = Vec::with_capacity(new_bucket_count);
220        new_buckets.resize_with(new_bucket_count, Default::default);
221
222        // We'll re-insert everything
223        for bucket in self.buckets.drain(..) {
224            for entry in bucket {
225                let h = self.build_hasher.hash_one(&entry.key) as usize % new_bucket_count;
226                new_buckets[h].push(entry);
227            }
228        }
229        self.bucket_count = new_bucket_count;
230        self.buckets = new_buckets;
231        // len remains the same
232    }
233}
234
235impl<K: Hash + Eq, V> Default for ChainedHashMap<K, V> {
236    fn default() -> Self {
237        Self::new()
238    }
239}
240
241// Additional convenience methods or iter types can be added if needed for production usage.
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[test]
248    fn basic_insert_get_remove() {
249        let mut map = ChainedHashMap::with_capacity(4);
250        assert_eq!(map.len(), 0);
251        assert!(map.is_empty());
252
253        // Insert
254        let old = map.insert("foo", 123);
255        assert_eq!(old, None);
256        assert_eq!(map.len(), 1);
257        assert!(!map.is_empty());
258
259        // Insert second
260        let old = map.insert("bar", 999);
261        assert_eq!(old, None);
262        assert_eq!(map.len(), 2);
263
264        // Insert existing
265        let old = map.insert("foo", 456);
266        assert_eq!(old, Some(123));
267        assert_eq!(map.len(), 2);
268
269        // get
270        assert_eq!(map.get(&"foo"), Some(&456));
271        assert_eq!(map.get(&"bar"), Some(&999));
272        assert_eq!(map.get(&"baz"), None);
273
274        // remove
275        let rm = map.remove(&"bar");
276        assert_eq!(rm, Some(999));
277        assert_eq!(map.len(), 1);
278        assert_eq!(map.get(&"bar"), None);
279    }
280
281    #[test]
282    fn test_resize() {
283        let mut map = ChainedHashMap::with_capacity(2);
284        for i in 0..10 {
285            map.insert(format!("key{}", i), i);
286        }
287        for i in 0..10 {
288            assert_eq!(map.get(&format!("key{}", i)), Some(&i));
289        }
290    }
291
292    #[test]
293    fn test_iter() {
294        let mut map = ChainedHashMap::new();
295        map.insert("one", 1);
296        map.insert("two", 2);
297        map.insert("three", 3);
298
299        let mut items: Vec<_> = map.iter().map(|(k, v)| (*k, *v)).collect();
300        items.sort_by_key(|x| x.0);
301        assert_eq!(items, vec![("one", 1), ("three", 3), ("two", 2)]);
302    }
303}