tab_hash/
lib.rs

1//! This crate offers rust implementations of simple and twisted tabulation hashing for 32-bit integer values.
2//!
3//! Instatiating `Tab32Simple` or `Tab32Twisted` will initialize a table and
4//! create a random hash function from the respective hash family.
5//! The hash value of a 32-bit integer can be computed by calling its `hash` method.
6//!
7//! # Example:
8//!
9//! ```rust
10//! use tab_hash::Tab32Simple;
11//!
12//! fn main() {
13//!     let keys = vec![0, 8, 15, 47, 11];
14//!     let simple = Tab32Simple::new();
15//!     for k in keys {
16//!         println!("{}", simple.hash(k));
17//!     }
18//! }
19//! ```
20//!
21//! To reprocude hashes, save the table used by the hash function and save it.
22//! The function can be recreated using the `with_table` constructor.
23//!
24//! ```rust
25//! use tab_hash::Tab32Twisted;
26//!
27//! fn main() {
28//!     let key = 42;
29//!     let twisted_1 = Tab32Twisted::new();
30//!     let twisted_2 = Tab32Twisted::with_table(twisted_1.get_table());
31//!     let twisted_3 = Tab32Twisted::new();
32//!     assert_eq!(twisted_1.hash(key), twisted_2.hash(key));
33//!     assert_ne!(twisted_1.hash(key), twisted_3.hash(key));
34//! }
35//! ```
36//!
37//! # Note:
38//! These hash functions do not implement the `std::hash::Hasher` trait,
39//! since they do not work on arbitrary length byte streams.
40//!
41//! # Literature:
42//! This implementation is based on the articles of Mihai Patrascu and Mikkel Thorup:
43//! - [Simple Tabulation Hashing](http://dx.doi.org/10.1145/1993636.1993638)
44//! - [Twisted Tabulation Hashing](https://doi.org/10.1137/1.9781611973105.16)
45use rand;
46use serde;
47use serde::{Deserialize, Deserializer, Serialize, Serializer};
48
49/// Split up a 32bit number into 8bit chunks
50fn byte_chunks_32(x: u32) -> [u8; 4] {
51    [
52        (x & 0x0000_00FF) as u8,
53        ((x & 0x0000_FF00) >> 8) as u8,
54        ((x & 0x00FF_0000) >> 16) as u8,
55        ((x & 0xFF00_0000) >> 24) as u8,
56    ]
57}
58
59/// Split up a 64bit number into 8bit chunks
60fn byte_chunks_64(x: u64) -> [u8; 8] {
61    [
62        (x & 0x0000_0000_0000_00FF) as u8,
63        ((x & 0x0000_0000_0000_FF00) >> 8) as u8,
64        ((x & 0x0000_0000_00FF_0000) >> 16) as u8,
65        ((x & 0x0000_0000_FF00_0000) >> 24) as u8,
66        ((x & 0x0000_00FF_0000_0000) >> 32) as u8,
67        ((x & 0x0000_FF00_0000_0000) >> 40) as u8,
68        ((x & 0x00FF_0000_0000_0000) >> 48) as u8,
69        ((x & 0xFF00_0000_0000_0000) >> 56) as u8,
70    ]
71}
72
73/// A universal hash function for 32-bit integers using simple tabulation.
74///
75/// Usage:
76/// ```rust
77/// use tab_hash::Tab32Simple;
78///
79/// fn main() {
80///     let keys = vec![0, 8, 15, 47, 11];
81///     let simple = Tab32Simple::new();
82///     for k in keys {
83///         println!("{}", simple.hash(k));
84///     }
85/// }
86/// ```
87#[derive(Clone, Deserialize)]
88pub struct Tab32Simple {
89    #[serde(deserialize_with = "tab32simple_from_vec")]
90    table: [[u32; 256]; 4],
91}
92
93impl Tab32Simple {
94    /// Create a new simple tabulation hash function with a random table.
95    pub fn new() -> Self {
96        Tab32Simple {
97            table: Tab32Simple::initialize_table(),
98        }
99    }
100
101    /// Create a new simple tabulation hash function with a random table.
102    pub fn to_vec(&self) -> Vec<Vec<u32>> {
103        let mut vec = Vec::with_capacity(4);
104        for col in self.table.iter() {
105            vec.push(col.to_vec());
106        }
107        vec
108    }
109
110    /// Create a new simple tabulation hash function with a random table.
111    pub fn from_vec(table_data: Vec<Vec<u32>>) -> Self {
112        let mut table = [[0_u32; 256]; 4];
113        assert_eq!(table_data.len(), 4);
114        for (i, column) in table_data.iter().enumerate() {
115            assert_eq!(column.len(), 256);
116            for (j, value) in column.iter().enumerate() {
117                table[i][j] = *value;
118            }
119        }
120        Tab32Simple { table }
121    }
122
123    /// Create a new simple tabulation hash function with a given table.
124    pub fn with_table(table: [[u32; 256]; 4]) -> Self {
125        Tab32Simple { table }
126    }
127
128    /// Generate a table of 32bit uints for simple tabulation hashing
129    fn initialize_table() -> [[u32; 256]; 4] {
130        let table: [[u32; 256]; 4] =
131            array_init::array_init(|_| array_init::array_init(|_| rand::random()));
132        table
133    }
134
135    /// Get the table used by this hash function.
136    pub fn get_table(&self) -> [[u32; 256]; 4] {
137        self.table
138    }
139
140    /// Compute simple tabulation hash value for a 32bit integer number.
141    pub fn hash(&self, x: u32) -> u32 {
142        let mut h: u32 = 0; // initialize hash values as 0
143
144        for (i, c) in byte_chunks_32(x).iter().enumerate() {
145            h ^= self.table[i as usize][*c as usize];
146        }
147        h
148    }
149}
150
151/// Custom serialization converting nested array to a nested vec (cannot be derived)
152fn tab32simple_from_vec<'de, D>(deserializer: D) -> Result<[[u32; 256]; 4], D::Error>
153where
154    D: Deserializer<'de>,
155{
156    let table_data: Vec<Vec<u32>> = Deserialize::deserialize(deserializer)?;
157
158    let mut table = [[0_u32; 256]; 4];
159    assert_eq!(table_data.len(), 4);
160    for (i, column) in table_data.iter().enumerate() {
161        assert_eq!(column.len(), 256);
162        for (j, value) in column.iter().enumerate() {
163            table[i][j] = *value;
164        }
165    }
166    Ok(table)
167}
168
169#[derive(Clone, Serialize)]
170struct _VecTab32Simple {
171    table: Vec<Vec<u32>>,
172}
173
174impl Serialize for Tab32Simple {
175    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
176    where
177        S: Serializer,
178    {
179        _VecTab32Simple {
180            table: self.to_vec(),
181        }
182        .serialize(s)
183    }
184}
185
186/// A universal hash function for 64-bit integers using simple tabulation.
187///
188/// Usage:
189/// ```rust
190/// use tab_hash::Tab64Simple;
191///
192/// fn main() {
193///     let keys = vec![0, 8, 15, 47, 11];
194///     let simple = Tab64Simple::new();
195///     for k in keys {
196///         println!("{}", simple.hash(k));
197///     }
198/// }
199/// ```
200#[derive(Clone, Deserialize)]
201pub struct Tab64Simple {
202    #[serde(deserialize_with = "tab64simple_from_vec")]
203    table: [[u64; 256]; 8],
204}
205
206impl Tab64Simple {
207    /// Create a new simple tabulation hash function with a random table.
208    pub fn new() -> Self {
209        Tab64Simple {
210            table: Tab64Simple::initialize_table(),
211        }
212    }
213
214    /// Create a new simple tabulation hash function with a random table.
215    pub fn to_vec(&self) -> Vec<Vec<u64>> {
216        let mut vec = Vec::with_capacity(8);
217        for col in self.table.iter() {
218            vec.push(col.to_vec());
219        }
220        vec
221    }
222
223    /// Create a new simple tabulation hash function with a random table.
224    pub fn from_vec(table_data: Vec<Vec<u64>>) -> Self {
225        let mut table = [[0_u64; 256]; 8];
226        assert_eq!(table_data.len(), 8);
227        for (i, column) in table_data.iter().enumerate() {
228            assert_eq!(column.len(), 256);
229            for (j, value) in column.iter().enumerate() {
230                table[i][j] = *value;
231            }
232        }
233        Tab64Simple { table }
234    }
235
236    /// Create a new simple tabulation hash function with a given table.
237    pub fn with_table(table: [[u64; 256]; 8]) -> Self {
238        Tab64Simple { table }
239    }
240
241    /// Generate a table of 64bit uints for simple tabulation hashing
242    fn initialize_table() -> [[u64; 256]; 8] {
243        let table: [[u64; 256]; 8] =
244            array_init::array_init(|_| array_init::array_init(|_| rand::random()));
245        table
246    }
247
248    /// Get the table used by this hash function.
249    pub fn get_table(&self) -> [[u64; 256]; 8] {
250        self.table
251    }
252
253    /// Compute simple tabulation hash value for a 64bit integer number.
254    pub fn hash(&self, x: u64) -> u64 {
255        let mut h: u64 = 0; // initialize hash values as 0
256
257        for (i, c) in byte_chunks_64(x).iter().enumerate() {
258            h ^= self.table[i as usize][*c as usize];
259        }
260        h
261    }
262}
263
264/// Custom serialization converting nested array to a nested vec (cannot be derived)
265fn tab64simple_from_vec<'de, D>(deserializer: D) -> Result<[[u64; 256]; 8], D::Error>
266where
267    D: Deserializer<'de>,
268{
269    let table_data: Vec<Vec<u64>> = Deserialize::deserialize(deserializer)?;
270
271    let mut table = [[0_u64; 256]; 8];
272    assert_eq!(table_data.len(), 8);
273    for (i, column) in table_data.iter().enumerate() {
274        assert_eq!(column.len(), 256);
275        for (j, value) in column.iter().enumerate() {
276            table[i][j] = *value;
277        }
278    }
279    Ok(table)
280}
281
282#[derive(Clone, Serialize)]
283struct _VecTab64Simple {
284    table: Vec<Vec<u64>>,
285}
286
287impl Serialize for Tab64Simple {
288    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
289    where
290        S: Serializer,
291    {
292        _VecTab64Simple {
293            table: self.to_vec(),
294        }
295        .serialize(s)
296    }
297}
298
299/// A universal hash function for 32-bit integers using twisted tabulation.
300///
301/// Usage:
302/// ```rust
303/// use tab_hash::Tab32Twisted;
304///
305/// fn main() {
306///     let keys = vec![0, 8, 15, 47, 11];
307///     let twisted = Tab32Twisted::new();
308///     for k in keys {
309///         println!("{}", twisted.hash(k));
310///     }
311/// }
312/// ```
313#[derive(Clone, Deserialize)]
314pub struct Tab32Twisted {
315    #[serde(deserialize_with = "tab32twisted_from_vec")]
316    table: [[u64; 256]; 4],
317}
318
319impl Tab32Twisted {
320    /// Create a new twisted tabulation hash function with a random table.
321    pub fn new() -> Self {
322        Tab32Twisted {
323            table: Tab32Twisted::initialize_table(),
324        }
325    }
326
327    /// Create a new simple tabulation hash function with a random table.
328    pub fn to_vec(&self) -> Vec<Vec<u64>> {
329        let mut vec = Vec::with_capacity(4);
330        for col in self.table.iter() {
331            vec.push(col.to_vec());
332        }
333        vec
334    }
335
336    /// Create a new simple tabulation hash function with a random table.
337    pub fn from_vec(table_data: Vec<Vec<u64>>) -> Self {
338        let mut table = [[0_u64; 256]; 4];
339        assert_eq!(table_data.len(), 4);
340        for (i, column) in table_data.iter().enumerate() {
341            assert_eq!(column.len(), 256);
342            for (j, value) in column.iter().enumerate() {
343                table[i][j] = *value;
344            }
345        }
346        Tab32Twisted { table }
347    }
348
349    /// Create a new twisted tabulation hash function with a given table.
350    pub fn with_table(table: [[u64; 256]; 4]) -> Self {
351        Tab32Twisted { table }
352    }
353
354    /// Generate a table of 64bit uints for twisted tabulation hashing
355    fn initialize_table() -> [[u64; 256]; 4] {
356        let table: [[u64; 256]; 4] =
357            array_init::array_init(|_| array_init::array_init(|_| rand::random()));
358        table
359    }
360
361    /// Get the table used by this hash function.
362    pub fn get_table(&self) -> [[u64; 256]; 4] {
363        self.table
364    }
365
366    /// Compute twisted tabulation hash value for a 32bit integer number.
367    pub fn hash(&self, x: u32) -> u32 {
368        let mut h: u64 = 0; // initialize hash values as 0
369        let chunks = byte_chunks_32(x);
370        for (i, c) in chunks[0..3].iter().enumerate() {
371            h ^= self.table[i as usize][*c as usize];
372        }
373        // compute address for last chunk by XOring the lowest byte of the
374        // current hash value with the content of the last chunk of the key
375        let c = chunks[3] ^ (h & 0xFF) as u8;
376        h ^= self.table[3][c as usize];
377        // shift out the 32 low bits of the resulting hash
378        h = h.overflowing_shr(32).0;
379
380        h as u32
381    }
382}
383
384/// Custom serialization converting nested array to a nested vec (cannot be derived)
385fn tab32twisted_from_vec<'de, D>(deserializer: D) -> Result<[[u64; 256]; 4], D::Error>
386where
387    D: Deserializer<'de>,
388{
389    let table_data: Vec<Vec<u64>> = Deserialize::deserialize(deserializer)?;
390
391    let mut table = [[0_u64; 256]; 4];
392    assert_eq!(table_data.len(), 4);
393    for (i, column) in table_data.iter().enumerate() {
394        assert_eq!(column.len(), 256);
395        for (j, value) in column.iter().enumerate() {
396            table[i][j] = *value;
397        }
398    }
399    Ok(table)
400}
401
402#[derive(Clone, Serialize)]
403struct _VecTab32Twisted {
404    table: Vec<Vec<u64>>,
405}
406
407impl Serialize for Tab32Twisted {
408    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
409    where
410        S: Serializer,
411    {
412        _VecTab32Twisted {
413            table: self.to_vec(),
414        }
415        .serialize(s)
416    }
417}
418
419/// A universal hash function for 64-bit integers using twisted tabulation.
420///
421/// Usage:
422/// ```rust
423/// use tab_hash::Tab64Twisted;
424///
425/// fn main() {
426///     let keys = vec![0, 8, 15, 47, 11];
427///     let twisted = Tab64Twisted::new();
428///     for k in keys {
429///         println!("{}", twisted.hash(k));
430///     }
431/// }
432/// ```
433#[derive(Clone, Deserialize)]
434pub struct Tab64Twisted {
435    #[serde(deserialize_with = "tab64twisted_from_vec")]
436    table: [[u128; 256]; 8],
437}
438
439impl Tab64Twisted {
440    /// Create a new twisted tabulation hash function with a random table.
441    pub fn new() -> Self {
442        Tab64Twisted {
443            table: Tab64Twisted::initialize_table(),
444        }
445    }
446
447    /// Create a new simple tabulation hash function with a random table.
448    pub fn to_vec(&self) -> Vec<Vec<u128>> {
449        let mut vec = Vec::with_capacity(8);
450        for col in self.table.iter() {
451            vec.push(col.to_vec());
452        }
453        vec
454    }
455
456    /// Create a new simple tabulation hash function with a random table.
457    pub fn from_vec(table_data: Vec<Vec<u128>>) -> Self {
458        let mut table = [[0_u128; 256]; 8];
459        assert_eq!(table_data.len(), 8);
460        for (i, column) in table_data.iter().enumerate() {
461            assert_eq!(column.len(), 256);
462            for (j, value) in column.iter().enumerate() {
463                table[i][j] = *value;
464            }
465        }
466        Tab64Twisted { table }
467    }
468
469    /// Create a new twisted tabulation hash function with a given table.
470    pub fn with_table(table: [[u128; 256]; 8]) -> Self {
471        Tab64Twisted { table }
472    }
473
474    /// Generate a table of 128bit uints for twisted tabulation hashing
475    fn initialize_table() -> [[u128; 256]; 8] {
476        let table: [[u128; 256]; 8] =
477            array_init::array_init(|_| array_init::array_init(|_| rand::random()));
478        table
479    }
480
481    /// Get the table used by this hash function.
482    pub fn get_table(&self) -> [[u128; 256]; 8] {
483        self.table
484    }
485
486    /// Compute twisted tabulation hash value for a 64bit integer number.
487    pub fn hash(&self, x: u64) -> u64 {
488        let mut h: u128 = 0; // initialize hash values as 0
489        let chunks = byte_chunks_64(x);
490        for (i, c) in chunks[0..7].iter().enumerate() {
491            h ^= self.table[i as usize][*c as usize];
492        }
493        // compute address for last chunk by XOring the lowest byte of the
494        // current hash value with the content of the last chunk of the key
495        let c = chunks[7] ^ (h & 0xFF) as u8;
496        h ^= self.table[7][c as usize];
497        // shift out the 64 low bits of the resulting hash
498        h = h.overflowing_shr(64).0;
499
500        h as u64
501    }
502}
503
504/// Custom serialization converting nested array to a nested vec (cannot be derived)
505fn tab64twisted_from_vec<'de, D>(deserializer: D) -> Result<[[u128; 256]; 8], D::Error>
506where
507    D: Deserializer<'de>,
508{
509    let table_data: Vec<Vec<u128>> = Deserialize::deserialize(deserializer)?;
510
511    let mut table = [[0_u128; 256]; 8];
512    assert_eq!(table_data.len(), 8);
513    for (i, column) in table_data.iter().enumerate() {
514        assert_eq!(column.len(), 256);
515        for (j, value) in column.iter().enumerate() {
516            table[i][j] = *value;
517        }
518    }
519    Ok(table)
520}
521
522#[derive(Clone, Serialize)]
523struct _VecTab64Twisted {
524    table: Vec<Vec<u128>>,
525}
526
527impl Serialize for Tab64Twisted {
528    fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
529    where
530        S: Serializer,
531    {
532        _VecTab64Twisted {
533            table: self.to_vec(),
534        }
535        .serialize(s)
536    }
537}
538
539// Tests for private methods
540#[test]
541fn byte_chunking_32() {
542    let random_bytes: [u8; 400] = array_init::array_init(|_| rand::random());
543    for four_bytes in random_bytes.chunks(4) {
544        let mut number = 0_u32;
545        for byte in four_bytes.iter().rev() {
546            number = (number << 8) | *byte as u32;
547        }
548        assert_eq!(four_bytes, byte_chunks_32(number));
549    }
550}
551
552#[test]
553fn byte_chunking_64() {
554    let random_bytes: [u8; 480] = array_init::array_init(|_| rand::random());
555    for four_bytes in random_bytes.chunks(8) {
556        let mut number = 0_u64;
557        for byte in four_bytes.iter().rev() {
558            number = (number << 8) | *byte as u64;
559        }
560        assert_eq!(four_bytes, byte_chunks_64(number));
561    }
562}