1use std::io::{self, Write};
10
11const BYTE_HASH: [u32; 256] = [
13 0x12bd9527, 0xf4140cea, 0x987bd6e1, 0x79079850, 0xafbfd539, 0xd350ce0a, 0x82973931, 0x9fc32b9c,
14 0x28003b88, 0xc30c13aa, 0x6b678c34, 0x5844ef1d, 0xaa552c18, 0x4a77d3e8, 0xd1f62ea0, 0x6599417c,
15 0xfbe30e7a, 0xf9e2d5ee, 0xa1fca42e, 0x41548969, 0x116d5b59, 0xaeda1e1a, 0xc5191c17, 0x54b9a3cb,
16 0x727e492a, 0x5c432f91, 0x31a50bce, 0xc2696af6, 0x217c8020, 0x1262aefc, 0xace75924, 0x9876a04f,
17 0xaf300bc2, 0x3ffce3f6, 0xd6680fb5, 0xd0b1ced8, 0x6651f842, 0x736fadef, 0xbc2d3429, 0xb03d2904,
18 0x7e634ba4, 0xdfd87d8c, 0x7988d63a, 0x4be4d933, 0x6a8d0382, 0x9e132d62, 0x3ee9c95f, 0xfec05b97,
19 0x6907ad34, 0x8616cfcc, 0xa6aabf24, 0x8ad1c92e, 0x4f2affc0, 0xb87519db, 0x6576eaf6, 0x15dbe00a,
20 0x63e1dd82, 0xa36b6a81, 0xeead99b3, 0xbc6a4309, 0x3478d1a7, 0x2182bcc0, 0xdd50cfce, 0x7cb25580,
21 0x73075483, 0x503b7f42, 0x4cd50d63, 0x3f4d94c9, 0x385fcbb7, 0x90daf16c, 0xece10b8e, 0x11c1cb04,
22 0x816a899b, 0x69a29d06, 0xfb090b37, 0xf98ef13c, 0x07653435, 0x9f15dc42, 0x3b43abdf, 0x1334283f,
23 0x93f3d9af, 0x0cbdfe71, 0xa788a614, 0x4f54d2f0, 0xd4374fc7, 0x70557ce7, 0xf741fce8, 0xe4b6f661,
24 0xc630cb98, 0x387a6366, 0x72f428fd, 0x539009db, 0xc53e3810, 0x1e1a52e5, 0x7d6816b0, 0x040f9b81,
25 0x9c99c9fb, 0x9f3af3d2, 0x774d1061, 0xd5c840ea, 0x8e1480fe, 0x6ee4023c, 0x2fbda535, 0xd88eff7a,
26 0xd8632a2a, 0x43c4e024, 0x3ef27971, 0xc72866fd, 0xe35cc630, 0x46d96220, 0x437a8384, 0xe92caf0c,
27 0x6290a47e, 0xa7bb9238, 0x0e1000f9, 0x49e76bdc, 0x3acfb4b8, 0x03582b8e, 0x6ea2de4e, 0x2ec1008d,
28 0xfcc8df69, 0x91c2fe0a, 0xb471c7d9, 0x778be812, 0x70d29ad1, 0x76411cbf, 0xc302e81c, 0x4e445194,
29 0x22e3aa72, 0xb65762e9, 0xa280db05, 0x827aa70e, 0x4c531a9d, 0x7a60bf4a, 0x8fd95a44, 0x2289aef0,
30 0xcd50ddc4, 0x639aae69, 0x5fe85ed6, 0x4ed724ff, 0x00f04f7d, 0x95a5fcb0, 0x88255d15, 0xa603d2c9,
31 0xf6956a5b, 0x53ea7f3e, 0xb570f225, 0x2b3be203, 0xa181e40e, 0xc413cdce, 0xa7cb1ebb, 0xcf258b1f,
32 0x516eb016, 0xca204586, 0xd1e69894, 0xe85a73d3, 0x7db2d382, 0xae73b463, 0x3598d643, 0x5087c864,
33 0xd91f30b6, 0xe1d4d1e7, 0x73b3b337, 0xceac1233, 0x8edf7845, 0xa69c45c9, 0xdb5db3ab, 0x28cfade8,
34 0xebfa49e7, 0xcbc2a659, 0x59cce971, 0x959a01af, 0x8ee9aae7, 0xfb2f01c6, 0x5a752836, 0x9ed12981,
35 0x618d05b6, 0x93ec12b3, 0x4590c779, 0xed1317a2, 0x03fe5835, 0x7ad3c6f7, 0xd4aad5b5, 0x1a995ed7,
36 0x247bfaa4, 0x69c2c799, 0x745fa405, 0xc5b9f239, 0xc3d9aebc, 0xa6f60e0b, 0xdf1e91d7, 0xab8e041c,
37 0xee3188c6, 0x37377a9e, 0xc0e1a3bf, 0x19a5a9e4, 0x56cb9556, 0xc4d33d3f, 0xfb1eb03e, 0xf9557057,
38 0x1be31d37, 0xd1fa65f1, 0xf518d714, 0x570ac722, 0xf26cf66a, 0x24794d47, 0x8ba2e402, 0x3f5137e6,
39 0x35be1453, 0x43350478, 0x9f05ee88, 0x364cf9cf, 0x39a23ee7, 0xa4db8d49, 0xc2ebb3d2, 0xc6fb99d5,
40 0xe014dfb0, 0x7156d425, 0xe090a87a, 0x4cc12f78, 0x1b30f503, 0x06694a7a, 0x68198cd1, 0x2f8345bd,
41 0x9d79198e, 0xd871943f, 0x22ef6cf4, 0xe81b1c15, 0x067b61d8, 0xfc4ea4f5, 0xfe6dab57, 0x1bf744ba,
42 0xa70b6a25, 0xafe6e412, 0xc6c1a05c, 0x8ffbe3ce, 0xc4270af1, 0xf3f36373, 0xc4507dd8, 0x5e6fd1e2,
43 0x58cd9739, 0x47d3c5b5, 0xe1d5a343, 0x3d4dea4a, 0x893d91ae, 0xbb2a5e2a, 0x0d57b800, 0x652a7cc9,
44 0x6a68ccfd, 0x62529f0b, 0xec5f36d6, 0x766cceda, 0x96ca63ef, 0xa0499838, 0xd9030f59, 0x8185f4d2,
45];
46
47pub struct BuzHash {
49 state: u32,
50 buf: Vec<u8>,
51 n: u32,
52 bshiftn: u32,
53 bshiftm: u32,
54 bufpos: usize,
55 overflow: bool,
56}
57impl BuzHash {
58 pub fn new(n: u32) -> Self {
59 let bshiftn = n % 32;
60 let bshiftm = 32 - bshiftn;
61 Self {
62 state: 0,
63 buf: vec![0u8; n as usize],
64 n,
65 bshiftn,
66 bshiftm,
67 bufpos: 0,
68 overflow: false,
69 }
70 }
71 pub fn hash_byte(&mut self, b: u8) -> u32 {
73 if self.bufpos == self.n as usize {
74 self.overflow = true;
75 self.bufpos = 0;
76 }
77
78 let mut state = self.state;
79 state = (state << 1) | (state >> 31);
80
81 if self.overflow {
82 let toshift = BYTE_HASH[self.buf[self.bufpos] as usize];
83 state ^= toshift << self.bshiftn | {
89 let (i, overflow) = toshift.overflowing_shr(self.bshiftm);
90 if overflow {
91 0
92 } else {
93 i
94 }
95 }
96 }
97
98 self.buf[self.bufpos] = b;
99 self.bufpos += 1;
100
101 state ^= BYTE_HASH[b as usize];
102
103 self.state = state;
104 state
105 }
106 pub fn sum32(&self) -> u32 {
107 self.state
108 }
109 pub fn reset(&mut self) {
110 self.state = 0;
111 self.bufpos = 0;
112 self.overflow = false;
113 }
114}
115impl Write for BuzHash {
116 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
117 for &b in buf {
118 let _ = self.hash_byte(b);
119 }
120 Ok(buf.len())
121 }
122 fn flush(&mut self) -> io::Result<()> {
123 Ok(())
124 }
125}
126
127#[cfg(test)]
128pub mod test {
129 use super::*;
130
131 const LOREMIPSUM1: &str = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
132Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et
133magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis,
134ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis
135enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In
136enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis
137eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum
138semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu,
139consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra
140quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet.
141Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur
142ullamcorper ultricies nisi. Nam eget dui.
143Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper
144libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel,
145luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt
146tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam
147sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit
148amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum
149sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce
150vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed,
151nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis
152hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et
153ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia.";
154
155 const LOREMIPSUM2: &str = "Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget,
156imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer
157ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent
158adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus.
159Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis
160vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet
161feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in
162justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque.
163Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi.
164Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo.
165Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae
166tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit.
167Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia
168Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac
169placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean
170tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla
171pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis.";
172
173 #[test]
174 fn rolling_hash() {
175 let phrase1 = "Aenean massa. Cum sociis natoque";
176 let phrase2 = "Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi";
177
178 let mut hasher1 = BuzHash::new(32);
179 hasher1.write_all(phrase1.as_bytes()).unwrap();
180 let p1sum = hasher1.sum32();
181 hasher1.reset();
182 let mut found = false;
183 for (idx, &b) in LOREMIPSUM1.as_bytes().iter().enumerate() {
184 let ssum = hasher1.hash_byte(b);
185 if (ssum == p1sum) && (idx - 32 == 91) {
186 found = true;
187 break;
188 }
189 }
190 assert!(found, "phrase1 sum not found within loremipsum1");
191
192 hasher1.write_all(phrase2.as_bytes()).unwrap();
193 let p2sum = hasher1.sum32();
194 hasher1.reset();
195 let mut found = false;
196 for (idx, &b) in LOREMIPSUM2.as_bytes().iter().enumerate() {
197 let ssum = hasher1.hash_byte(b);
198 if (ssum == p2sum) && (idx - 64 == 592) {
199 found = true;
200 break;
201 }
202 }
203 assert!(found, "phrase2 sum not found within loremipsum2");
204 }
205}