Skip to main content

delsum_lib/polyhash/
mod.rs

1use std::{fmt::Display, str::FromStr};
2mod rev;
3pub use rev::reverse_polyhash;
4#[cfg(feature = "parallel")]
5pub use rev::reverse_polyhash_para;
6
7use crate::{
8    bitnum::Modnum,
9    checksum::{parse_hex, CheckBuilderErr, Checksum, Digest, LinearCheck},
10    endian::{Endian, SignedInt, Signedness, WordSpec},
11    keyval::KeyValIter,
12};
13
14#[derive(Debug, Clone)]
15pub struct PolyHashBuilder<S> {
16    width: Option<usize>,
17    factor: Option<S>,
18    init: Option<S>,
19    addout: Option<S>,
20    input_endian: Option<Endian>,
21    output_endian: Option<Endian>,
22    wordsize: Option<usize>,
23    signedness: Option<Signedness>,
24    check: Option<S>,
25    name: Option<String>,
26}
27
28impl<S: Modnum> FromStr for PolyHashBuilder<S> {
29    fn from_str(s: &str) -> Result<PolyHashBuilder<S>, CheckBuilderErr> {
30        let mut sum = PolyHash::<S>::with_options();
31        for x in KeyValIter::new(s) {
32            let (current_key, current_val) = match x {
33                Err(key) => return Err(CheckBuilderErr::MalformedString(key)),
34                Ok(s) => s,
35            };
36            let sum_op = match current_key.as_str() {
37                "width" => usize::from_str(&current_val).ok().map(|x| sum.width(x)),
38                "factor" => Some(sum.factor(parse_hex::<S>(&current_val, "factor")?)),
39                "init" => Some(sum.init(parse_hex::<S>(&current_val, "init")?)),
40                "addout" => Some(sum.addout(parse_hex::<S>(&current_val, "addout")?)),
41                "in_endian" => Endian::from_str(&current_val).ok().map(|x| sum.inendian(x)),
42                "wordsize" => usize::from_str(&current_val).ok().map(|x| sum.wordsize(x)),
43                "out_endian" => Endian::from_str(&current_val)
44                    .ok()
45                    .map(|x| sum.outendian(x)),
46                "signedness" => Signedness::from_str(&current_val)
47                    .ok()
48                    .map(|x| sum.signedness(x)),
49                "name" => Some(sum.name(&current_val)),
50                _ => return Err(CheckBuilderErr::UnknownKey(current_key)),
51            };
52            match sum_op {
53                Some(c) => sum = c.clone(),
54                None => return Err(CheckBuilderErr::MalformedString(current_key)),
55            }
56        }
57        Ok(sum)
58    }
59    type Err = CheckBuilderErr;
60}
61
62impl<S: Modnum> PolyHashBuilder<S> {
63    /// The total width, in bits, of the sum. Mandatory.
64    ///
65    /// The checksum is calculated modulo 2^width.
66    pub fn width(&mut self, w: usize) -> &mut Self {
67        self.width = Some(w);
68        self
69    }
70    /// The factor by which the sum gets multiplied before adding
71    /// the next input byte.
72    pub fn factor(&mut self, w: S) -> &mut Self {
73        self.factor = Some(w);
74        self
75    }
76    /// The initial value of the checksum, default 0.
77    pub fn init(&mut self, i: S) -> &mut Self {
78        self.init = Some(i);
79        self
80    }
81    /// The value that gets added to the checksum at the out.
82    pub fn addout(&mut self, i: S) -> &mut Self {
83        self.addout = Some(i);
84        self
85    }
86    /// The endian of the words of the input file
87    pub fn inendian(&mut self, e: Endian) -> &mut Self {
88        self.input_endian = Some(e);
89        self
90    }
91    /// The number of bits in a word of the input file
92    pub fn wordsize(&mut self, n: usize) -> &mut Self {
93        self.wordsize = Some(n);
94        self
95    }
96    /// The endian of the checksum
97    pub fn outendian(&mut self, e: Endian) -> &mut Self {
98        self.output_endian = Some(e);
99        self
100    }
101    fn signedness(&mut self, s: Signedness) -> &mut Self {
102        self.signedness = Some(s);
103        self
104    }
105    /// The checksum of "123456789", gets checked on creation.
106    pub fn check(&mut self, c: S) -> &mut Self {
107        self.check = Some(c);
108        self
109    }
110    /// An optional name that gets used for display purposes.
111    pub fn name(&mut self, n: &str) -> &mut Self {
112        self.name = Some(String::from(n));
113        self
114    }
115    /// Builds the algorithm, after validating the parameters.
116    pub fn build(&self) -> Result<PolyHash<S>, CheckBuilderErr> {
117        let width = self
118            .width
119            .ok_or(CheckBuilderErr::MissingParameter("width"))?;
120        if width > 64 {
121            return Err(CheckBuilderErr::ValueOutOfRange("width"));
122        }
123        let factor = self
124            .factor
125            .ok_or(CheckBuilderErr::MissingParameter("factor"))?;
126        if factor & S::one() == S::zero() || factor == S::one() {
127            return Err(CheckBuilderErr::ValueOutOfRange("factor"));
128        };
129        let init = mask(width, self.init.unwrap_or_else(S::zero));
130        let addout = mask(width, self.addout.unwrap_or_else(S::zero));
131        let wordsize = self.wordsize.unwrap_or(8);
132        if wordsize == 0 || wordsize % 8 != 0 || wordsize > 64 {
133            return Err(CheckBuilderErr::ValueOutOfRange("wordsize"));
134        }
135        let wordspec = WordSpec {
136            input_endian: self.input_endian.unwrap_or(Endian::Big),
137            wordsize,
138            output_endian: self.output_endian.unwrap_or(Endian::Big),
139            signedness: self.signedness.unwrap_or(Signedness::Unsigned),
140        };
141        let s = PolyHash {
142            width,
143            factor,
144            init,
145            addout,
146            wordspec,
147            name: self.name.clone(),
148        };
149        match self.check {
150            Some(c) => {
151                let mut sum = s.init();
152                for &x in b"123456789" {
153                    sum = s.dig_word(sum, SignedInt::pos(x as u64));
154                }
155                s.finalize(sum);
156                if sum == c {
157                    return Ok(s);
158                }
159                Err(CheckBuilderErr::CheckFail)
160            }
161            None => Ok(s),
162        }
163    }
164}
165
166#[derive(Debug, PartialEq, Eq)]
167pub struct PolyHash<S> {
168    width: usize,
169    factor: S,
170    init: S,
171    addout: S,
172    wordspec: WordSpec,
173    name: Option<String>,
174}
175
176fn mask<S: Modnum>(width: usize, word: S) -> S {
177    if word.bits() > width {
178        word & ((S::one() << width) - S::one())
179    } else {
180        word
181    }
182}
183
184impl<S: Modnum> PolyHash<S> {
185    fn mask(&self, word: S) -> S {
186        mask(self.width, word)
187    }
188
189    pub fn with_options() -> PolyHashBuilder<S> {
190        PolyHashBuilder {
191            width: None,
192            factor: None,
193            init: None,
194            addout: None,
195            input_endian: None,
196            output_endian: None,
197            signedness: None,
198            wordsize: None,
199            check: None,
200            name: None,
201        }
202    }
203}
204
205impl<S: Modnum> Display for PolyHash<S> {
206    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207        match &self.name {
208            Some(n) => write!(f, "{}", n),
209            None => {
210                write!(
211                    f,
212                    "polyhash width={} factor={:#x} init={:#x} addout={:#x} signedness={}",
213                    self.width, self.factor, self.init, self.addout, self.wordspec.signedness
214                )?;
215                if self.wordspec.word_bytes() != 1 || self.wordspec.input_endian != Endian::Big {
216                    write!(
217                        f,
218                        " in_endian={} wordsize={}",
219                        self.wordspec.input_endian, self.wordspec.wordsize,
220                    )?;
221                };
222                if self.width > 8 {
223                    write!(f, " out_endian={}", self.wordspec.output_endian)?;
224                }
225                Ok(())
226            }
227        }
228    }
229}
230
231impl<S: Modnum> FromStr for PolyHash<S> {
232    type Err = CheckBuilderErr;
233
234    fn from_str(s: &str) -> Result<Self, Self::Err> {
235        PolyHashBuilder::from_str(s)?.build()
236    }
237}
238
239impl<S: Modnum> Digest for PolyHash<S> {
240    type Sum = S;
241
242    fn init(&self) -> Self::Sum {
243        self.init
244    }
245
246    fn dig_word(&self, sum: Self::Sum, word: SignedInt<u64>) -> Self::Sum {
247        let mut value = S::mod_from(word.value, &S::zero());
248        if word.negative {
249            value = value.wrapping_neg();
250        }
251        self.mask(sum.wrapping_mul(&self.factor).wrapping_add(&value))
252    }
253
254    fn finalize(&self, sum: Self::Sum) -> Self::Sum {
255        self.mask(sum.wrapping_add(&self.addout))
256    }
257
258    fn to_bytes(&self, s: Self::Sum) -> Vec<u8> {
259        self.wordspec.output_to_bytes(s, self.width)
260    }
261
262    fn checksum_from_bytes(&self, bytes: &[u8]) -> Option<Self::Sum> {
263        Checksum::from_bytes(bytes, self.wordspec.output_endian, self.width)
264    }
265
266    fn wordspec(&self) -> WordSpec {
267        self.wordspec
268    }
269}
270
271impl<S: Modnum> LinearCheck for PolyHash<S> {
272    type Shift = S;
273
274    fn init_shift(&self) -> Self::Shift {
275        S::one()
276    }
277
278    fn inc_shift(&self, shift: Self::Shift) -> Self::Shift {
279        self.factor.wrapping_mul(&shift)
280    }
281
282    fn shift(&self, sum: Self::Sum, shift: &Self::Shift) -> Self::Sum {
283        self.mask(sum.wrapping_mul(shift))
284    }
285
286    fn add(&self, sum_a: Self::Sum, sum_b: &Self::Sum) -> Self::Sum {
287        self.mask(sum_a.wrapping_add(sum_b))
288    }
289
290    fn negate(&self, sum: Self::Sum) -> Self::Sum {
291        self.mask(S::zero().wrapping_sub(&sum))
292    }
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298    use crate::checksum::tests::*;
299
300    #[test]
301    fn sdbm() {
302        let sdbm = PolyHash::<u32>::with_options()
303            .width(32)
304            .factor(0x1003f)
305            .name("sdbm")
306            .build()
307            .unwrap();
308        test_shifts(&sdbm);
309        test_find(&sdbm);
310        test_prop(&sdbm);
311        check_example(&sdbm, 0x694c5cd2);
312    }
313
314    #[test]
315    fn djb2() {
316        let djb2 = PolyHash::<u32>::with_options()
317            .width(32)
318            .factor(33)
319            .init(5381)
320            .name("djb2")
321            .build()
322            .unwrap();
323        test_shifts(&djb2);
324        test_find(&djb2);
325        test_prop(&djb2);
326        check_example(&djb2, 0x84a046e5);
327    }
328
329    #[test]
330    fn masking() {
331        let polyhash = PolyHash::<u32>::with_options()
332            .width(7)
333            .factor(0x2f)
334            .build()
335            .unwrap();
336        assert_eq!(
337            polyhash
338                .digest(&[0, 18, 232, 236, 87, 255, 203, 100])
339                .unwrap(),
340            0x77
341        );
342    }
343}