fbuzhash/
lib.rs

1//! A (partial) Rust implementation of a Go BuzHash, with the bare minimum of modifications.
2//!
3//! Source reference and credit goes to: https://github.com/silvasur/buzhash
4//!
5//! ## License
6//!
7//! The reference implementation has a [LICENSE](https://github.com/silvasur/buzhash/blob/master/LICENSE)
8//! that i'm not exactly sure what to do with.
9use std::io::{self, Write};
10
11/// 256 random uint32's to hash a single byte.
12const BYTE_HASH: [u32; 256] = [
13    0x12bd9527, 0xf4140cea, 0x987bd6e1, 0x79079850, 0xafbfd539, 0xd350ce0a, 0x82973931, 0x9fc32b9c,
14    0x28003b88, 0xc30c13aa, 0x6b678c34, 0x5844ef1d, 0xaa552c18, 0x4a77d3e8, 0xd1f62ea0, 0x6599417c,
15    0xfbe30e7a, 0xf9e2d5ee, 0xa1fca42e, 0x41548969, 0x116d5b59, 0xaeda1e1a, 0xc5191c17, 0x54b9a3cb,
16    0x727e492a, 0x5c432f91, 0x31a50bce, 0xc2696af6, 0x217c8020, 0x1262aefc, 0xace75924, 0x9876a04f,
17    0xaf300bc2, 0x3ffce3f6, 0xd6680fb5, 0xd0b1ced8, 0x6651f842, 0x736fadef, 0xbc2d3429, 0xb03d2904,
18    0x7e634ba4, 0xdfd87d8c, 0x7988d63a, 0x4be4d933, 0x6a8d0382, 0x9e132d62, 0x3ee9c95f, 0xfec05b97,
19    0x6907ad34, 0x8616cfcc, 0xa6aabf24, 0x8ad1c92e, 0x4f2affc0, 0xb87519db, 0x6576eaf6, 0x15dbe00a,
20    0x63e1dd82, 0xa36b6a81, 0xeead99b3, 0xbc6a4309, 0x3478d1a7, 0x2182bcc0, 0xdd50cfce, 0x7cb25580,
21    0x73075483, 0x503b7f42, 0x4cd50d63, 0x3f4d94c9, 0x385fcbb7, 0x90daf16c, 0xece10b8e, 0x11c1cb04,
22    0x816a899b, 0x69a29d06, 0xfb090b37, 0xf98ef13c, 0x07653435, 0x9f15dc42, 0x3b43abdf, 0x1334283f,
23    0x93f3d9af, 0x0cbdfe71, 0xa788a614, 0x4f54d2f0, 0xd4374fc7, 0x70557ce7, 0xf741fce8, 0xe4b6f661,
24    0xc630cb98, 0x387a6366, 0x72f428fd, 0x539009db, 0xc53e3810, 0x1e1a52e5, 0x7d6816b0, 0x040f9b81,
25    0x9c99c9fb, 0x9f3af3d2, 0x774d1061, 0xd5c840ea, 0x8e1480fe, 0x6ee4023c, 0x2fbda535, 0xd88eff7a,
26    0xd8632a2a, 0x43c4e024, 0x3ef27971, 0xc72866fd, 0xe35cc630, 0x46d96220, 0x437a8384, 0xe92caf0c,
27    0x6290a47e, 0xa7bb9238, 0x0e1000f9, 0x49e76bdc, 0x3acfb4b8, 0x03582b8e, 0x6ea2de4e, 0x2ec1008d,
28    0xfcc8df69, 0x91c2fe0a, 0xb471c7d9, 0x778be812, 0x70d29ad1, 0x76411cbf, 0xc302e81c, 0x4e445194,
29    0x22e3aa72, 0xb65762e9, 0xa280db05, 0x827aa70e, 0x4c531a9d, 0x7a60bf4a, 0x8fd95a44, 0x2289aef0,
30    0xcd50ddc4, 0x639aae69, 0x5fe85ed6, 0x4ed724ff, 0x00f04f7d, 0x95a5fcb0, 0x88255d15, 0xa603d2c9,
31    0xf6956a5b, 0x53ea7f3e, 0xb570f225, 0x2b3be203, 0xa181e40e, 0xc413cdce, 0xa7cb1ebb, 0xcf258b1f,
32    0x516eb016, 0xca204586, 0xd1e69894, 0xe85a73d3, 0x7db2d382, 0xae73b463, 0x3598d643, 0x5087c864,
33    0xd91f30b6, 0xe1d4d1e7, 0x73b3b337, 0xceac1233, 0x8edf7845, 0xa69c45c9, 0xdb5db3ab, 0x28cfade8,
34    0xebfa49e7, 0xcbc2a659, 0x59cce971, 0x959a01af, 0x8ee9aae7, 0xfb2f01c6, 0x5a752836, 0x9ed12981,
35    0x618d05b6, 0x93ec12b3, 0x4590c779, 0xed1317a2, 0x03fe5835, 0x7ad3c6f7, 0xd4aad5b5, 0x1a995ed7,
36    0x247bfaa4, 0x69c2c799, 0x745fa405, 0xc5b9f239, 0xc3d9aebc, 0xa6f60e0b, 0xdf1e91d7, 0xab8e041c,
37    0xee3188c6, 0x37377a9e, 0xc0e1a3bf, 0x19a5a9e4, 0x56cb9556, 0xc4d33d3f, 0xfb1eb03e, 0xf9557057,
38    0x1be31d37, 0xd1fa65f1, 0xf518d714, 0x570ac722, 0xf26cf66a, 0x24794d47, 0x8ba2e402, 0x3f5137e6,
39    0x35be1453, 0x43350478, 0x9f05ee88, 0x364cf9cf, 0x39a23ee7, 0xa4db8d49, 0xc2ebb3d2, 0xc6fb99d5,
40    0xe014dfb0, 0x7156d425, 0xe090a87a, 0x4cc12f78, 0x1b30f503, 0x06694a7a, 0x68198cd1, 0x2f8345bd,
41    0x9d79198e, 0xd871943f, 0x22ef6cf4, 0xe81b1c15, 0x067b61d8, 0xfc4ea4f5, 0xfe6dab57, 0x1bf744ba,
42    0xa70b6a25, 0xafe6e412, 0xc6c1a05c, 0x8ffbe3ce, 0xc4270af1, 0xf3f36373, 0xc4507dd8, 0x5e6fd1e2,
43    0x58cd9739, 0x47d3c5b5, 0xe1d5a343, 0x3d4dea4a, 0x893d91ae, 0xbb2a5e2a, 0x0d57b800, 0x652a7cc9,
44    0x6a68ccfd, 0x62529f0b, 0xec5f36d6, 0x766cceda, 0x96ca63ef, 0xa0499838, 0xd9030f59, 0x8185f4d2,
45];
46
47/// BuzHash implements the hash.Hash32 interface and also has a function to write a single byte.
48pub struct BuzHash {
49    state: u32,
50    buf: Vec<u8>,
51    n: u32,
52    bshiftn: u32,
53    bshiftm: u32,
54    bufpos: usize,
55    overflow: bool,
56}
57impl BuzHash {
58    pub fn new(n: u32) -> Self {
59        let bshiftn = n % 32;
60        let bshiftm = 32 - bshiftn;
61        Self {
62            state: 0,
63            buf: vec![0u8; n as usize],
64            n,
65            bshiftn,
66            bshiftm,
67            bufpos: 0,
68            overflow: false,
69        }
70    }
71    /// HashByte updates the hash with a single byte and returns the resulting sum
72    pub fn hash_byte(&mut self, b: u8) -> u32 {
73        if self.bufpos == self.n as usize {
74            self.overflow = true;
75            self.bufpos = 0;
76        }
77
78        let mut state = self.state;
79        state = (state << 1) | (state >> 31);
80
81        if self.overflow {
82            let toshift = BYTE_HASH[self.buf[self.bufpos] as usize];
83            // I believe the assumption here is that the toshift always comes from
84            // BYTE_HASH, and thus left shifting via bshiftn can't overflow if all
85            // the values within BYTE_HASH are safe. Not positive.
86            //
87            // NOTE(leeola): be paranoid about this conversion from Go to Rust.
88            state ^= toshift << self.bshiftn | {
89                let (i, overflow) = toshift.overflowing_shr(self.bshiftm);
90                if overflow {
91                    0
92                } else {
93                    i
94                }
95            }
96        }
97
98        self.buf[self.bufpos] = b;
99        self.bufpos += 1;
100
101        state ^= BYTE_HASH[b as usize];
102
103        self.state = state;
104        state
105    }
106    pub fn sum32(&self) -> u32 {
107        self.state
108    }
109    pub fn reset(&mut self) {
110        self.state = 0;
111        self.bufpos = 0;
112        self.overflow = false;
113    }
114}
115impl Write for BuzHash {
116    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
117        for &b in buf {
118            let _ = self.hash_byte(b);
119        }
120        Ok(buf.len())
121    }
122    fn flush(&mut self) -> io::Result<()> {
123        Ok(())
124    }
125}
126
127#[cfg(test)]
128pub mod test {
129    use super::*;
130
131    const LOREMIPSUM1: &str = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
132Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et
133magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis,
134ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis
135enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In
136enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis
137eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum
138semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu,
139consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra
140quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet.
141Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur
142ullamcorper ultricies nisi. Nam eget dui.
143Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper
144libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel,
145luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt
146tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam
147sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit
148amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum
149sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce
150vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed,
151nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis
152hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et
153ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia.";
154
155    const LOREMIPSUM2: &str = "Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget,
156imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer
157ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent
158adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus.
159Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis
160vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet
161feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in
162justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque.
163Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi.
164Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo.
165Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae
166tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit.
167Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia
168Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac
169placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean
170tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla
171pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis.";
172
173    #[test]
174    fn rolling_hash() {
175        let phrase1 = "Aenean massa. Cum sociis natoque";
176        let phrase2 = "Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi";
177
178        let mut hasher1 = BuzHash::new(32);
179        hasher1.write_all(phrase1.as_bytes()).unwrap();
180        let p1sum = hasher1.sum32();
181        hasher1.reset();
182        let mut found = false;
183        for (idx, &b) in LOREMIPSUM1.as_bytes().iter().enumerate() {
184            let ssum = hasher1.hash_byte(b);
185            if (ssum == p1sum) && (idx - 32 == 91) {
186                found = true;
187                break;
188            }
189        }
190        assert!(found, "phrase1 sum not found within loremipsum1");
191
192        hasher1.write_all(phrase2.as_bytes()).unwrap();
193        let p2sum = hasher1.sum32();
194        hasher1.reset();
195        let mut found = false;
196        for (idx, &b) in LOREMIPSUM2.as_bytes().iter().enumerate() {
197            let ssum = hasher1.hash_byte(b);
198            if (ssum == p2sum) && (idx - 64 == 592) {
199                found = true;
200                break;
201            }
202        }
203        assert!(found, "phrase2 sum not found within loremipsum2");
204    }
205}