delsum_lib/
checksum.rs

1use crate::bitnum::BitNum;
2use crate::endian::Endian;
3use crate::endian::SignedInt;
4use crate::endian::WordSpec;
5use crate::endian::bytes_to_int;
6use crate::utils::SignedInclRange;
7use crate::utils::UnsignedInclRange;
8#[cfg(feature = "parallel")]
9use rayon::prelude::*;
10use std::cmp::Ordering;
11use std::convert::TryFrom;
12use std::fmt::Debug;
13
14/// A basic trait for a checksum where
15/// * init gives an initial state
16/// * dig_word processes a single word
17/// * finalize is applied to get the final sum after all words are processed.
18///
19/// They should be implemented in a way such that the digest default implementation
20/// corresponds to calculating the checksum.
21///
22/// Unlike LinearCheck, it is not really required to be linear yet, but in
23/// context of this application, there is really no use only implementing this.
24pub trait Digest {
25    /// A type that holds the checksum.
26    ///
27    /// Note that in this application, a separate state type that holds the interal state
28    /// and gets converted to a Sum by finalize
29    /// is not really feasable because of the operations LinearCheck would need to do
30    /// both on Sums and interal States, so a single Sum type must be enough.
31    type Sum: Clone + Eq + Ord + Debug + Send + Sync + Checksum;
32    /// Gets an initial sum before the words are processed through the sum.
33    ///
34    /// For instance in the case of crc, the sum type is some integer and the returned value from
35    /// this function could be 0xffffffff (e.g. in the bzip2 crc).
36    fn init(&self) -> Self::Sum;
37    /// Processes a single word from the text.
38    ///
39    /// For a crc, this corresponds to shifting, adding the word and reducing.
40    fn dig_word(&self, sum: Self::Sum, word: SignedInt<u64>) -> Self::Sum;
41    /// After all words are read, this function is called to do some finalization.
42    ///
43    /// In the case of crc, this corresponds to adding a constant at the end
44    /// (and maybe also adding some 0s to the end of the text).
45    fn finalize(&self, sum: Self::Sum) -> Self::Sum;
46    /// Takes the sum and turns it into an array of bytes (may depend on configured endian)
47    fn to_bytes(&self, s: Self::Sum) -> Vec<u8>;
48    /// Takes a slice of bytes and turns it into a sum, if it fits (may depend on configured endian).
49    fn checksum_from_bytes(&self, bytes: &[u8]) -> Option<Self::Sum>;
50    /// Iterate over the words of a file so that digest calculates the checksum
51    fn wordspec(&self) -> WordSpec;
52    /// Takes a reader and calculates the checksums of all words therein.
53    fn digest(&self, buf: &[u8]) -> Result<Self::Sum, std::io::Error> {
54        let wordspec = self.wordspec();
55        if buf.len() % wordspec.word_bytes() != 0 {
56            return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof));
57        }
58        let sum = wordspec
59            .iter_words(buf)
60            .fold(self.init(), |c, s| self.dig_word(c, s));
61        Ok(self.finalize(sum))
62    }
63}
64
65#[derive(Copy, Clone, PartialEq, Eq)]
66pub enum Relativity {
67    Start,
68    End,
69}
70
71impl From<SignedInclRange> for Relativity {
72    fn from(r: SignedInclRange) -> Self {
73        if r.start() < 0 {
74            Relativity::End
75        } else {
76            Relativity::Start
77        }
78    }
79}
80
81/// A checksum that also has some notion of linearity.
82///
83/// What does linearity mean here? In a mathematically pure world, it would mean
84/// that you could add the texts in some way (XOR for crc) and that would be the
85/// same as adding (XORing) both checksums.
86/// However, we live in a world that needs practical considerations, so it's not as clean.
87/// Mostly, this is skewed by `init` and `finalize`.
88///
89/// This trait adds another type, the `Shift` type.
90/// This acts, when applied to an unfinalized sum in the `shift` function, as if appending
91/// `n` 0s to the summed text. For example, in a Fletcher sum, this would simply be an integer
92/// containing `n` and applying the shift corresponds to adding the first sum `n` times to the second one, possible in
93/// constant time. However, in the crc case, this is not possible in constant time just using
94/// the integer containing `n`. In this case, the value of of `x^{8n}` reduced by the generator is stored
95/// and the shift is applied using polynomial multiplication modulo the generator.
96///
97/// The assumptions are here (the `self`s are omitted for clarity):
98/// * `add(a,b)` forms an abeliean group with `negate(a)` as inverse (hereafter, the sum value 0 will be equal to `add(init(), negate(init()))`)
99/// * `shift(s, shift_n(1)) == dig(s, 0u8)`
100/// * `shift(s, shift_n(1))` is bijective in the set of all valid `Sum` values
101/// * `shift(shift(s, shift_n(a)), shift_n(b)) == shift(s, shift_n(a+b))`
102/// * `add(dig_word(s, 0), dig_word(r, 0)) == dig_word(add(s, r), 0)`
103/// * `dig_word(s, k) == dig_word(0, k) + dig_word(s, 0)` (consequently, `dig_word(0, 0) == 0`)
104/// * for all sums `s`, `add(finalize(s), negate(s))` is constant (finalize adds a constant value to the sum)
105/// * all methods without default implementations (including those from `Digest`) should run in constant time (assuming constant `Shift`, `Sum` types)
106///
107/// We can also see Digest::Sum as an abelian group with a group action by shift_n: ℤ -> Aut(Digest::Sum)
108/// (where LinearCheck::Shift represents some subgroup of Aut(Digest::Sum)). If we call σ = shift_n(1),
109/// then for various implementations it is:
110/// * modsum: identity
111/// * fletcher: (s, c) |-> (s, c + s) (note that automorphisms of finite abelian groups generally behave like matrices)
112/// * crc: s |-> s * x^8
113/// * polyhash: s |-> s * factor
114///
115/// dig_word(s, k) is then σ(s) + Sum::from_word(k), shift(s, t) is the application of the automorphism t(s),
116/// add and negate are the usual group operations, init_shift is the identity automorphism and inc_shift corresponds to
117/// t |-> t * σ.
118pub trait LinearCheck: Digest + Send + Sync {
119    /// The Shift type (see trait documentation for more).
120    type Shift: Clone;
121    /// The initial shift corresponding to the identity shift of 0 (see trait documentation for more).
122    fn init_shift(&self) -> Self::Shift;
123    /// Increments the shift by one (see trait documentation for more).
124    fn inc_shift(&self, shift: Self::Shift) -> Self::Shift;
125    /// Applies a shift to a sum (see trait documentation for more).
126    fn shift(&self, sum: Self::Sum, shift: &Self::Shift) -> Self::Sum;
127    /// Adds two sums together (see trait documentation for more).
128    fn add(&self, sum_a: Self::Sum, sum_b: &Self::Sum) -> Self::Sum;
129    /// Gets inverse in the abelian group of `add` (see trait documentation for more).
130    fn negate(&self, sum: Self::Sum) -> Self::Sum;
131    /// Acts as if applying `dig_word(s, 0)` `n` times to to `s` (see trait documentation for more).
132    ///
133    /// Please implement more efficient (equivalent) implementation for each type if possible.
134    fn shift_n(&self, n: usize) -> Self::Shift {
135        let mut shift = self.init_shift();
136        for _ in 0..n {
137            shift = self.inc_shift(shift);
138        }
139        shift
140    }
141
142    /// Given some bytes and a target sum, determines all segments in the bytes that have that
143    /// particular checksum.
144    ///
145    /// Each element of the return value contains a tuple consisting of an array of possible segment starts
146    /// and an array of possible segment ends. If there are multiple starts or ends, each possible combination
147    /// has the target checksum.
148    ///
149    /// This function has a high space usage per byte: for `n` bytes, it uses a total space of `n*(8 + 2*sizeof(Sum))` bytes.
150    /// The time is bounded by the runtime of the sort algorithm, which is around `n*log(n)`.
151    /// If Hashtables were used, it could be done in linear time, but they take too much space.
152    fn find_segments<'a>(
153        &self,
154        bytes: &'a [Vec<u8>],
155        sum: &'a [impl Fn(&'a [u8], usize) -> Option<Self::Sum> + Send + Sync],
156        rel: Relativity,
157    ) -> Vec<RangePair> {
158        if bytes.is_empty() {
159            return Vec::new();
160        }
161        let min_len = bytes.iter().map(|x| x.len()).min().unwrap();
162        let start_range = SignedInclRange::new(0, (min_len - 1) as isize);
163        let end_range = match rel {
164            Relativity::Start => SignedInclRange::new(0, (min_len - 1) as isize),
165            Relativity::End => SignedInclRange::new(-(min_len as isize), -1),
166        };
167        let (start_range, end_range) = match (start_range, end_range) {
168            (Some(s), Some(e)) => (s, e),
169            (None, _) | (_, None) => return Vec::new(),
170        };
171        if u32::try_from(bytes[0].len()).is_err() {
172            // only support 32-bit length files for now, since a usize for every byte would take a lot of space
173            panic!("File must be under 4GiB!");
174        }
175        self.find_segments_range(bytes, sum, start_range, end_range)
176    }
177
178    fn find_segments_range<'a>(
179        &self,
180        bytes: &'a [Vec<u8>],
181        sum: &'a [impl Fn(&'a [u8], usize) -> Option<Self::Sum> + Send + Sync],
182        start_range: SignedInclRange,
183        end_range: SignedInclRange,
184    ) -> Vec<RangePair> {
185        let mut ret = Vec::new();
186        let min_len = bytes.iter().map(|x| x.len()).min().unwrap();
187        let step = self.wordspec().word_bytes();
188
189        if Relativity::from(start_range) != Relativity::from(end_range)
190            && bytes
191                .windows(2)
192                .map(|x| x[0].len() % step != x[1].len() % step)
193                .any(bool::from)
194        {
195            // in case one of the ranges is relative to the start and the other relative to the end,
196            // and there are two files which have a length difference that is not a multiple of the step
197            // lengths, then any checksum on the first file that is a multiple of `step` in length would have
198            // a corresponding checksum over a length that is not a multiple of `step`, therefore we return
199            // here early since this case isn't actually caught by the general code and would result
200            // in unexpected results
201            return Vec::new();
202        }
203
204        // limit start and end range to actual offsets lying within the smallest file
205        let (start_range, end_range) = match (start_range.limit(min_len), end_range.limit(min_len))
206        {
207            (None, _) | (_, None) => return Vec::new(),
208            (Some(start), Some(end)) => (start, end),
209        };
210        for offset in 0..step {
211            let current_start_range =
212                match start_range.set_start(start_range.start() + offset as isize) {
213                    Some(x) => x,
214                    None => break,
215                };
216            ret.append(
217                &mut find_segments_aligned(self, bytes, sum, current_start_range, end_range)
218                    .unwrap_or_else(Vec::new),
219            );
220        }
221        ret.sort_unstable();
222        ret
223    }
224}
225
226fn find_segments_aligned<'a, S: LinearCheck + ?Sized>(
227    summer: &S,
228    bytes: &'a [Vec<u8>],
229    sum: &'a [impl Fn(&'a [u8], usize) -> Option<S::Sum> + Send + Sync],
230    start_range: SignedInclRange,
231    end_range: SignedInclRange,
232) -> Option<Vec<RangePair>> {
233    let min_len = bytes.iter().map(|x| x.len()).min().unwrap();
234    let (start_range, end_range) = normalize_range(
235        start_range,
236        end_range,
237        summer.wordspec().word_bytes(),
238        min_len,
239    )?;
240    #[cfg(feature = "parallel")]
241    let (start_presums, end_presums) = bytes
242        .par_iter()
243        .zip(sum.par_iter())
244        .map(|(b, s)| {
245            presums(
246                summer,
247                b,
248                s,
249                // since they are already normalized, this should work
250                start_range.to_unsigned(b.len()).unwrap(),
251                end_range.to_unsigned(b.len()).unwrap(),
252            )
253        })
254        .unzip();
255    #[cfg(not(feature = "parallel"))]
256    let (start_presums, end_presums) = bytes
257        .iter()
258        .zip(sum.iter())
259        .map(|(b, s)| {
260            presums(
261                summer,
262                b,
263                &s,
264                // since they are already normalized, this should work
265                start_range.to_unsigned(b.len()).unwrap(),
266                end_range.to_unsigned(b.len()).unwrap(),
267            )
268        })
269        .unzip();
270    let start_preset = PresumSet::new(start_presums);
271    let end_preset = PresumSet::new(end_presums);
272    let mut ret_vec = Vec::new();
273
274    let step = summer.wordspec().word_bytes();
275    let least_start_range_start = start_range.to_unsigned(min_len)?.start();
276    let least_end_range_start = end_range.to_unsigned(min_len)?.start();
277    for (a, b) in start_preset.equal_pairs(&end_preset) {
278        let starts: Vec<_> = a
279            .iter()
280            .map(|x| usize::try_from(*x).unwrap() * step + least_start_range_start)
281            .collect();
282        let ends: Vec<_> = b
283            .iter()
284            .map(|x| usize::try_from(*x).unwrap() * step + least_end_range_start)
285            .collect();
286        let min_start = *starts.iter().min().unwrap_or(&min_len);
287        let max_end = *ends.iter().max().unwrap_or(&0);
288        let rel_ends: Vec<_> = ends
289            .into_iter()
290            .filter(|x| x >= &min_start)
291            .map(|x| match end_range.into() {
292                Relativity::Start => isize::try_from(x).unwrap(),
293                Relativity::End => -isize::try_from(min_len - x).unwrap(),
294            })
295            .collect();
296        let rel_starts = starts
297            .into_iter()
298            .filter(|x| x <= &max_end)
299            .map(|x| match start_range.into() {
300                Relativity::Start => isize::try_from(x).unwrap(),
301                Relativity::End => -isize::try_from(min_len - x).unwrap(),
302            })
303            .collect();
304        if !rel_ends.is_empty() {
305            ret_vec.push((rel_starts, rel_ends));
306        }
307    }
308    Some(ret_vec)
309}
310
311// this takes care of shortening the ranges so that the least presums are calculated,
312// this is done before calling presums because presums does not know the lengths of the
313// other files and we might get different lengths for different files
314fn normalize_range(
315    mut start_range: SignedInclRange,
316    mut end_range: SignedInclRange,
317    step: usize,
318    min_len: usize,
319) -> Option<(SignedInclRange, SignedInclRange)> {
320    let mut start = start_range.to_unsigned(min_len)?;
321    let mut end = end_range.to_unsigned(min_len)?;
322    end = end.set_end(end.end().max(start.start() + step - 1))?;
323
324    // the "middle" part must be in the total range
325    end = end.set_start(end.start().clamp(start.start() + step - 1, end.end()))?;
326
327    // align them to be a multiple of step away from start.start
328    end = end
329        .set_end(start.start() + step - 1 + (end.end() - start.start() - step + 1) / step * step)?
330        .set_start(start.start() + step - 1 + (end.start() - start.start()) / step * step)?;
331    // clamp and align the end of the start range too
332    start = start.set_end(start.end().clamp(start.start(), end.end()))?;
333    start = start.set_end(start.start() + (start.end() - start.start()) / step * step)?;
334
335    let to_rel = |x: SignedInclRange| {
336        if x.start() >= 0 {
337            Relativity::Start
338        } else {
339            Relativity::End
340        }
341    };
342    start_range = start.to_signed(to_rel(start_range), to_rel(start_range), min_len)?;
343    end_range = end.to_signed(to_rel(end_range), to_rel(end_range), min_len)?;
344    Some((start_range, end_range))
345}
346
347fn presums<'a, S: LinearCheck + ?Sized>(
348    summer: &S,
349    bytes: &'a [u8],
350    sum: impl Fn(&'a [u8], usize) -> Option<S::Sum>,
351    start_range: UnsignedInclRange,
352    end_range: UnsignedInclRange,
353) -> (Vec<S::Sum>, Vec<Option<S::Sum>>) {
354    if start_range.start() > start_range.end() || end_range.start() > end_range.end() {
355        return (Vec::new(), Vec::new());
356    }
357    if start_range.start() >= bytes.len() {
358        return (Vec::new(), Vec::new());
359    }
360    // we calculate two presum arrays, one for the starting values and one for the end values
361    let step = summer.wordspec().word_bytes();
362    let mut state = summer.init();
363    let mut start_presums = Vec::with_capacity(start_range.len() / step);
364    let mut end_presums = Vec::with_capacity(end_range.len() / step);
365    let neg_init = summer.negate(summer.init());
366    let iter_range = start_range.start()..=end_range.end();
367    for (i, c) in summer
368        .wordspec()
369        .iter_words(&bytes[iter_range.clone()])
370        .enumerate()
371        .map(|(i, c)| (i * step + start_range.start(), c))
372    {
373        if start_range.contains(i) {
374            // from the startsums, we substract the init value of the checksum
375            start_presums.push(summer.add(state.clone(), &neg_init));
376        }
377        state = summer.dig_word(state, c);
378        if end_range.contains(i + step - 1) {
379            let checksum = sum(bytes, i + step);
380            // from the endsums, we finalize them and subtract the given final sum
381            let endstate = checksum
382                .map(|chksum| summer.add(summer.finalize(state.clone()), &summer.negate(chksum)));
383            end_presums.push(endstate);
384        }
385    }
386    let mut start_index = start_presums.len();
387    let mut end_index = end_presums.len();
388    // we then shift checksums to length of file
389    let mut shift = summer.init_shift();
390    for i in iter_range
391        .rev()
392        .filter(|i| (i - start_range.start()) % step == 0)
393    {
394        if end_range.contains(i + step - 1) {
395            end_index -= 1;
396            end_presums[end_index] = end_presums[end_index]
397                .clone()
398                .map(|end_presum| summer.shift(end_presum, &shift));
399        }
400        shift = summer.inc_shift(shift);
401        if start_range.contains(i) {
402            start_index -= 1;
403            start_presums[start_index] = summer.shift(start_presums[start_index].clone(), &shift)
404        }
405    }
406    assert_eq!(start_index, 0);
407    assert_eq!(end_index, 0);
408    // This has the effect that, when substracting the n'th startsum from the m'th endsum, we get the checksum
409    // from n to m, minus the final sum (all shifted by (len-m)), which is 0 exactly when the checksum from n to m is equal to
410    // the final sum, which means that start_presums[n] = end_presums[m]
411    //
412    // we then sort an array of indices so equal elements are adjacent, allowing us to easily get the equal elements
413    // Anyway, here's some cryptic stuff i made up and have to put at least *somewhere* so i don't forget it
414    // 		        ([0..m] + f - s)*x^(k-m) - ([0..n] - i)*x^(k-n)
415    // (4)	        = ([0..m] + f - s)*x^(k-m) - ([0..n] - i)*x^(m-n)*x^(k-m)
416    // (1) (5)		= ([0..m] + f - s - [0..n]*x^(n-m) + i*x^(m-n))*x^(k-m)
417    // (2) (6)		= ([0..n]*x^(m-n) + [n..m] + f - s - [0 ..n]*x^(n-m) + i*x^(m-n))*x^(k-m)
418    // (1)		    = ([n..m] + f - s + i*x^(m-n))*x^(k-m)
419    // (6)		    = (i*[n..m] + f - s)*x^(k-m)
420    // (7)		    = (finalize(i*[n..m]) - s)*x^(k-m)
421    // therefore
422    //                  (finalize(i*[n..m]) - s)*x^(k-m) == 0
423    // (2) (3) (6)  <=> finalize(i*[n..m]) - s           == 0
424    // (1)          <=> finalize(i*[n..m])               == s
425    (start_presums, end_presums)
426}
427
428pub fn const_sum<T>(sum: T) -> impl Fn(&[u8], usize) -> T + Send + Sync
429where
430    T: Clone + Send + Sync,
431{
432    #[inline]
433    move |_, _| sum.clone()
434}
435
436pub type RangePair = (Vec<isize>, Vec<isize>);
437
438/// A struct for helping to sort and get duplicates of arrays of arrays.
439#[derive(Debug)]
440struct PresumSet<Sum: Clone + Eq + Ord + Debug> {
441    idx: Vec<u32>,
442    presum: Vec<Vec<Sum>>,
443}
444
445fn cmp_idx_generic<A, B>(
446    presum_a: &[Vec<A>],
447    a: u32,
448    presum_b: &[Vec<B>],
449    b: u32,
450    cmp: impl Fn(&A, &B) -> Ordering,
451) -> Ordering {
452    for (x, y) in presum_a.iter().zip(presum_b.iter()) {
453        let cmp = cmp(&x[a as usize], &y[b as usize]);
454        if cmp != Ordering::Equal {
455            return cmp;
456        }
457    }
458    Ordering::Equal
459}
460
461/// Compares all elements of the first vector at an index to the ones of the second vector lexicographically (assuming same length).
462fn cmp_idx<Sum: Ord>(presum_a: &[Vec<Sum>], a: u32, presum_b: &[Vec<Sum>], b: u32) -> Ordering {
463    cmp_idx_generic(presum_a, a, presum_b, b, |x, y| x.cmp(y))
464}
465
466fn cmp_idx_opt<Sum: Ord>(
467    presum_a: &[Vec<Sum>],
468    a: u32,
469    presum_b: &[Vec<Option<Sum>>],
470    b: u32,
471) -> Ordering {
472    cmp_idx_generic(presum_a, a, presum_b, b, |x, y| match y {
473        Some(y) => x.cmp(y),
474        None => Ordering::Greater,
475    })
476}
477
478impl<Sum: Clone + Eq + Ord + Debug + Send + Sync> PresumSet<Sum> {
479    /// Gets a new PresumSet. Gets sorted on construction.
480    fn new(presum: Vec<Vec<Sum>>) -> Self {
481        let firstlen = presum[0].len();
482        // check that all sum arrays are of the same length
483        for x in presum.iter() {
484            assert_eq!(firstlen, x.len());
485        }
486        // vector of all indices
487        let mut idxvec: Vec<_> = (0..firstlen as u32).collect();
488        // get a permutation vector representing the sort of the presum arrays first by value and then by index
489
490        #[cfg(feature = "parallel")]
491        idxvec.par_sort_unstable_by(|a, b| cmp_idx(&presum, *a, &presum, *b).then(a.cmp(b)));
492        #[cfg(not(feature = "parallel"))]
493        idxvec.sort_unstable_by(|a, b| cmp_idx(&presum, *a, &presum, *b).then(a.cmp(&b)));
494        Self {
495            idx: idxvec,
496            presum,
497        }
498    }
499    /// Finds groups of indices equal elements in the first set and the second set and
500    /// returns them for each equal array.
501    fn equal_pairs(&self, ends: &PresumSet<Option<Sum>>) -> Vec<(Vec<u32>, Vec<u32>)> {
502        let mut ret = Vec::new();
503        let mut a_idx = 0;
504        let mut b_idx = 0;
505        while a_idx < self.idx.len() && b_idx < ends.idx.len() {
506            let apos = self.idx[a_idx];
507            let bpos = ends.idx[b_idx];
508            match cmp_idx_opt(&self.presum, apos, &ends.presum, bpos) {
509                Ordering::Less => {
510                    a_idx += 1;
511                }
512                Ordering::Greater => {
513                    b_idx += 1;
514                }
515                Ordering::Equal => {
516                    let mut n_a = 0;
517                    // gets all runs of equal elements in a and b array
518                    for x in &self.idx[a_idx..] {
519                        if cmp_idx(&self.presum, apos, &self.presum, *x) == Ordering::Equal {
520                            n_a += 1;
521                        } else {
522                            break;
523                        }
524                    }
525                    let mut n_b = 0;
526                    for x in &ends.idx[b_idx..] {
527                        if cmp_idx(&ends.presum, bpos, &ends.presum, *x) == Ordering::Equal {
528                            n_b += 1;
529                        } else {
530                            break;
531                        }
532                    }
533                    let mut a_vec = Vec::new();
534                    a_vec.extend_from_slice(&self.idx[a_idx..a_idx + n_a]);
535                    let mut b_vec = Vec::new();
536                    b_vec.extend_from_slice(&ends.idx[b_idx..b_idx + n_b]);
537                    ret.push((a_vec, b_vec));
538                    // puts indexes beyond equal elements
539                    a_idx += n_a;
540                    b_idx += n_b;
541                }
542            }
543        }
544        // sort it, for good measure
545        ret.sort_unstable();
546        ret
547    }
548}
549
550#[derive(Clone, PartialEq, Eq, Debug, Hash)]
551pub enum CheckBuilderErr {
552    /// The checksum given on construction does not match
553    /// the checksum of "123456789"
554    CheckFail,
555    /// A mandatory parameter is missing
556    MissingParameter(&'static str),
557    /// A value of a parameter is out of range
558    ValueOutOfRange(&'static str),
559    /// A value of a parameter is not a hex value
560    ValueNotHex(&'static str),
561    /// The given string given to the from_str function
562    /// could not be interpreted correctly,
563    ///
564    /// The String indicates the key with the malformant.
565    MalformedString(String),
566    /// A key given to the from_str function is not known
567    UnknownKey(String),
568}
569
570impl std::fmt::Display for CheckBuilderErr {
571    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
572        use CheckBuilderErr::*;
573        match self {
574            CheckFail => write!(f, "Failed checksum test"),
575            MissingParameter(para) => write!(f, "Missing parameter '{}'", para),
576            ValueOutOfRange(key) => write!(f, "Value for parameter '{}' invalid", key),
577            ValueNotHex(key) => write!(f, "Value for parameter '{}' misses a '0x' prefix", key),
578            MalformedString(key) => {
579                if key.is_empty() {
580                    write!(f, "Malformed input string")
581                } else {
582                    write!(f, "Malformed input string in parameter '{}'", key)
583                }
584            }
585            UnknownKey(key) => write!(f, "Unknown key '{}'", key),
586        }
587    }
588}
589
590impl std::error::Error for CheckBuilderErr {}
591
592pub(crate) fn filter_opt_err<T, E>(
593    it: impl Iterator<Item = Result<T, Option<E>>>,
594) -> impl Iterator<Item = Result<T, E>> {
595    it.filter_map(|x| match x {
596        Ok(a) => Some(Ok(a)),
597        Err(Some(e)) => Some(Err(e)),
598        Err(None) => None,
599    })
600}
601
602pub(crate) fn parse_hex<Sum: BitNum>(s: &str, param_name: &'static str) -> Result<Sum, CheckBuilderErr> {
603    let Some(s) = s.strip_prefix("0x") else {
604        return Err(CheckBuilderErr::ValueNotHex(param_name));
605    };
606
607    Sum::from_hex(s).map_err(|_| CheckBuilderErr::MalformedString(param_name.to_string()))
608}
609
610#[derive(Clone, PartialEq, Eq, Debug, Hash)]
611pub enum CheckReverserError {
612    MissingParameter(&'static str),
613    UnsuitableFiles(&'static str),
614    ChecksumFileMismatch,
615}
616
617impl std::fmt::Display for CheckReverserError {
618    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
619        use CheckReverserError::*;
620        match self {
621            MissingParameter(s) => write!(f, "Missing Parameters: {}", s),
622            UnsuitableFiles(s) => write!(
623                f,
624                "Could not reverse because \
625                files are unsuitable: {}",
626                s
627            ),
628            ChecksumFileMismatch => write!(
629                f,
630                "Number of files does not \
631                match number of checksums"
632            ),
633        }
634    }
635}
636
637impl std::error::Error for CheckReverserError {}
638
639/// Trait for checksums
640pub trait Checksum: Sized {
641    fn to_width_str(&self, width: usize) -> String;
642    fn from_bytes(bytes: &[u8], endian: Endian, width: usize) -> Option<Self>;
643}
644
645// default implementation for normal numbers
646impl<T: crate::bitnum::BitNum> Checksum for T {
647    fn to_width_str(&self, width: usize) -> String {
648        if width == 0 {
649            return String::new();
650        }
651        let w = (width - 1) / 4 + 1;
652        format!("{:0width$x}", self, width = w)
653    }
654
655    fn from_bytes(bytes: &[u8], endian: Endian, width: usize) -> Option<Self> {
656        let byte_count = width.div_ceil(8);
657        if bytes.len() > byte_count {
658            return None;
659        }
660
661        let sum = bytes_to_int::<T>(bytes, endian);
662        if width % 8 != 0 && T::one() << width <= sum {
663            return None;
664        }
665
666        Some(sum)
667    }
668}
669
670#[allow(dead_code)]
671#[cfg(test)]
672pub(crate) mod tests {
673    use super::*;
674    use crate::checksum::Relativity;
675    use quickcheck::{Arbitrary, Gen, TestResult};
676    use rand::Rng;
677    static EXAMPLE_TEXT: &str = r#"Als Gregor Samsa eines Morgens aus unruhigen Träumen erwachte, fand er sich in
678seinem Bett zu einem ungeheueren Ungeziefer verwandelt. Er lag auf seinem
679panzerartig harten Rücken und sah, wenn er den Kopf ein wenig hob, seinen
680gewölbten, braunen, von bogenförmigen Versteifungen geteilten Bauch, auf
681dessen Höhe sich die Bettdecke, zum gänzlichen Niedergleiten bereit, kaum
682noch erhalten konnte. Seine vielen, im Vergleich zu seinem sonstigen Umfang
683kläglich dünnen Beine flimmerten ihm hilflos vor den Augen.
684
685»Was ist mit mir geschehen?« dachte er. Es war kein Traum, sein Zimmer, ein
686richtiges, nur etwas zu kleines Menschenzimmer, lag ruhig zwischen den vier
687wohlbekannten Wänden, über dem Tisch, auf dem eine auseinandergepackte
688Musterkollektion von Tuchwaren ausgebreitet war – Samsa war Reisender –,
689hing das Bild, das er vor kurzem aus einer illustrierten Zeitschrift
690ausgeschnitten und in einem hübschen, vergoldeten Rahmen untergebracht hatte.
691Es stellte eine Dame dar, die, mit einem Pelzhut und einer Pelzboa versehen,
692aufrecht dasaß und einen schweren Pelzmuff, in dem ihr ganzer Unterarm
693verschwunden war, dem Beschauer entgegenhob.
694
695Gregors Blick richtete sich dann zum Fenster, und das trübe Wetter – man
696hörte Regentropfen auf das Fensterblech aufschlagen – machte ihn ganz
697melancholisch. »Wie wäre es, wenn ich noch ein wenig weiterschliefe und alle
698Narrheiten vergäße,« dachte er, aber das war gänzlich undurchführbar, denn
699er war gewöhnt, auf der rechten Seite zu schlafen, konnte sich aber in seinem
700gegenwärtigen Zustand nicht in diese Lage bringen. Mit welcher Kraft er sich
701auch auf die rechte Seite warf, immer wieder schaukelte er in die Rückenlage
702zurück. Er versuchte es wohl hundertmal, schloß die Augen, um die zappelnden
703Beine nicht sehen zu müssen und ließ erst ab, als er in der Seite einen noch
704nie gefühlten, leichten, dumpfen Schmerz zu fühlen begann.
705"#;
706    pub fn test_shifts<T: LinearCheck>(chk: &T) {
707        let test_sum = chk
708            .digest(&b"T\x00\x00\x00E\x00\x00\x00S\x00\x00\x00\x00T"[..])
709            .unwrap();
710        let shift3 = chk.shift_n(3);
711        let shift4 = chk.inc_shift(shift3.clone());
712        let mut new_sum = chk.init();
713        let pos = SignedInt::pos;
714        new_sum = chk.dig_word(new_sum, pos(b'T' as u64));
715        new_sum = chk.shift(new_sum, &shift3);
716        new_sum = chk.dig_word(new_sum, pos(b'E' as u64));
717        new_sum = chk.shift(new_sum, &shift3);
718        new_sum = chk.dig_word(new_sum, pos(b'S' as u64));
719        new_sum = chk.shift(new_sum, &shift4);
720        new_sum = chk.dig_word(new_sum, pos(b'T' as u64));
721        assert_eq!(test_sum, chk.finalize(new_sum));
722    }
723    pub fn test_find<L: LinearCheck>(chk: &L) {
724        let sum_1_9 = chk.digest(&b"123456789"[..]).unwrap();
725        let sum_9_1 = chk.digest(&b"987654321"[..]).unwrap();
726        let sum_1_9_1 = chk.digest(&b"12345678987654321"[..]).unwrap();
727        assert_eq!(
728            chk.find_segments(
729                &[Vec::from("a123456789X1235H123456789Y")],
730                &[const_sum(Some(sum_1_9.clone()))],
731                Relativity::Start
732            ),
733            vec![(vec![1], vec![9]), (vec![16], vec![24])]
734        );
735        assert_eq!(
736            chk.find_segments(
737                &[
738                    Vec::from("XX98765432123456789XXX"),
739                    Vec::from("XX12345678987654321XX")
740                ],
741                &[
742                    const_sum(Some(sum_1_9.clone())),
743                    const_sum(Some(sum_9_1.clone()))
744                ],
745                Relativity::Start
746            ),
747            vec![(vec![10], vec![18])]
748        );
749        assert_eq!(
750            chk.find_segments(
751                &[
752                    Vec::from("XXX12345678987654321AndSoOn"),
753                    Vec::from("ABC123456789.super."),
754                    Vec::from("Za!987654321ergrfrf")
755                ],
756                &[Some(sum_1_9_1), Some(sum_1_9), Some(sum_9_1)].map(const_sum),
757                Relativity::End
758            ),
759            vec![(vec![3], vec![-8])]
760        )
761    }
762    pub fn check_example<D: Digest>(chk: &D, sum: D::Sum) {
763        assert_eq!(chk.digest(EXAMPLE_TEXT.as_bytes()).unwrap(), sum)
764    }
765    // this was written before including quickcheck, hence this manual property testing implementation
766    pub fn test_prop<L: LinearCheck>(chk: &L) {
767        let mut test_values = Vec::new();
768        test_values.push(chk.init());
769        let e = &chk.add(chk.negate(chk.init()), &chk.init());
770        test_values.push(e.clone());
771        let mut rng = rand::rng();
772        let mut s = chk.init();
773        while test_values.len() < 100 {
774            s = chk.dig_word(s, SignedInt::pos(rng.random()));
775            if rng.random_bool(0.01) {
776                test_values.push(s.clone());
777            }
778        }
779        for a in test_values.iter() {
780            check_neutral(chk, e, a);
781            check_invert(chk, e, a);
782            check_shift1(chk, a);
783            check_shiftn(chk, a);
784            check_bil(chk, e, a);
785            check_fin(chk, e, a);
786            for b in test_values.iter() {
787                check_commut(chk, a, b);
788                check_dist(chk, a, b);
789                for c in test_values.iter() {
790                    check_assoc(chk, a, b, c);
791                }
792            }
793        }
794    }
795    fn check_assoc<L: LinearCheck>(chk: &L, a: &L::Sum, b: &L::Sum, c: &L::Sum) {
796        assert_eq!(
797            chk.add(chk.add(a.clone(), b), c),
798            chk.add(a.clone(), &chk.add(b.clone(), c)),
799            "Associativity Fail: ({:x?} + {:x?}) + {:x?} != {:x?} + ({:x?} + {:x?})",
800            a,
801            b,
802            c,
803            a,
804            b,
805            c
806        );
807    }
808    fn check_neutral<L: LinearCheck>(chk: &L, e: &L::Sum, a: &L::Sum) {
809        assert_eq!(
810            chk.add(a.clone(), e),
811            a.clone(),
812            "Neutral Element Fail: {:x?} + {:x?} != {:x?}",
813            a,
814            e,
815            a
816        );
817    }
818    fn check_commut<L: LinearCheck>(chk: &L, a: &L::Sum, b: &L::Sum) {
819        assert_eq!(
820            chk.add(b.clone(), a),
821            chk.add(a.clone(), b),
822            "Commutativity Fail: {:x?} + {:x?} != {:x?} + {:x?}",
823            b,
824            a,
825            a,
826            b
827        );
828    }
829    fn check_invert<L: LinearCheck>(chk: &L, e: &L::Sum, a: &L::Sum) {
830        assert_eq!(
831            chk.add(chk.negate(a.clone()), a),
832            e.clone(),
833            "Inversion Fail: -{:x?} + {:x?} != {:x?}",
834            a,
835            a,
836            e
837        );
838    }
839    fn check_shift1<L: LinearCheck>(chk: &L, a: &L::Sum) {
840        assert_eq!(
841            chk.shift(a.clone(), &chk.shift_n(1)),
842            chk.dig_word(a.clone(), SignedInt::pos(0u64)),
843            "Shift1 Fail: shift({:x?}, shift_n1(1)) != dig_word({:x?}, 0u8)",
844            a,
845            a
846        );
847    }
848    fn check_shiftn<L: LinearCheck>(chk: &L, a: &L::Sum) {
849        for x in &[1, 5, 16, 1094, 5412] {
850            let shifted = chk.shift(a.clone(), &chk.shift_n(*x));
851            for y in &[4, 526, 0, 41, 4321] {
852                assert_eq!(
853                    chk.shift(shifted.clone(), &chk.shift_n(*y)),
854                    chk.shift(a.clone(), &chk.shift_n(x + y)),
855                    "Shiftn Fail: shift(shift({:x?}, shift_n({:?})), shift_n({:?})) != shift({:x?}, shift_n({} + {}))",
856                    a,
857                    x,
858                    y,
859                    a,
860                    x,
861                    y
862                );
863            }
864        }
865    }
866    fn check_dist<L: LinearCheck>(chk: &L, a: &L::Sum, b: &L::Sum) {
867        assert_eq!(
868            chk.add(
869                chk.dig_word(a.clone(), SignedInt::pos(0u64)),
870                &chk.dig_word(b.clone(), SignedInt::pos(0u64))
871            ),
872            chk.dig_word(chk.add(a.clone(), b), SignedInt::pos(0u64)),
873            "Distributivity Fail: dig_word({:x?}, 0u8) + dig_word({:x?}, 0u8) != dig_word({:x?} + {:x?}, 0u8)",
874            a,
875            b,
876            a,
877            b
878        );
879    }
880    fn check_bil<L: LinearCheck>(chk: &L, e: &L::Sum, a: &L::Sum) {
881        for k in 0u64..=255 {
882            assert_eq!(
883                chk.dig_word(a.clone(), SignedInt::pos(k)),
884                chk.add(
885                    chk.dig_word(a.clone(), SignedInt::pos(0u64)),
886                    &chk.dig_word(e.clone(), SignedInt::pos(k))
887                ),
888                "Bilinearity Fail: dig_word({:x?}, {:#x}) != dig_word({:x?}, 0u8) + dig_word(0, {:#x}u8)",
889                a,
890                k,
891                a,
892                k
893            )
894        }
895    }
896    fn check_fin<L: LinearCheck>(chk: &L, e: &L::Sum, a: &L::Sum) {
897        assert_eq!(
898            chk.add(chk.finalize(a.clone()), &chk.negate(a.clone())),
899            chk.finalize(e.clone()),
900            "Finalize Linearity Fail: finalize({:x?}) - {:x?} != {:x?}",
901            a,
902            a,
903            &chk.finalize(e.clone())
904        )
905    }
906    /// For generating files for tests so that there are at least 3 with one having a different length
907    /// and the individual lengths are multiples of 8 so that power-of-two wordsizes can be tested
908    #[derive(Clone, PartialEq, Eq, Debug)]
909    pub struct ReverseFileSet(pub Vec<Vec<u8>>);
910    impl Arbitrary for ReverseFileSet {
911        fn arbitrary(g: &mut Gen) -> Self {
912            let new_size = |q: &mut Gen| {
913                let s = q.size() / 8;
914                8 * (usize::arbitrary(q) % s)
915            };
916            let n_files = (usize::arbitrary(g) % g.size()) + 3;
917            let mut lengths = Vec::new();
918            for _ in 0..n_files {
919                lengths.push(new_size(g));
920            }
921            if lengths.iter().all(|x| *x == lengths[0]) {
922                lengths[0] += 8;
923            }
924            let mut ret = Vec::with_capacity(n_files);
925            for new_len in lengths {
926                let mut cur_file = Vec::with_capacity(new_len);
927                for _ in 0..new_len {
928                    cur_file.push(u8::arbitrary(g));
929                }
930                ret.push(cur_file);
931            }
932            ret.sort_by(|a, b| a.len().cmp(&b.len()).then(a.cmp(b)).reverse());
933            ReverseFileSet(ret)
934        }
935        fn shrink(&self) -> Box<dyn Iterator<Item = Self>> {
936            let vec = self.0.clone();
937            Box::new((1..=(vec.len() - 3)).map(move |x| ReverseFileSet(Vec::from(&vec[x..]))))
938        }
939    }
940    impl ReverseFileSet {
941        pub fn with_checksums<T: Digest>(&self, dig: &T) -> Vec<(&[u8], Vec<u8>)> {
942            self.0
943                .iter()
944                .map(|f| {
945                    let checksum = dig.to_bytes(dig.digest(f.as_slice()).unwrap());
946                    (f.as_slice(), checksum)
947                })
948                .collect()
949        }
950        pub fn check_matching<T, I>(&self, reference: &T, result_iter: I) -> TestResult
951        where
952            T: Digest + Eq + std::fmt::Display,
953            I: Iterator<Item = Result<T, CheckReverserError>>,
954            T::Sum: std::fmt::LowerHex,
955        {
956            let chk_files = self.with_checksums(reference);
957            let mut has_appeared = false;
958            for (count, modsum_loop) in result_iter.enumerate() {
959                if count > 1000 {
960                    return TestResult::discard();
961                }
962                let modsum_loop = match modsum_loop {
963                    Err(_) => return TestResult::discard(),
964                    Ok(x) => x,
965                };
966                if &modsum_loop == reference {
967                    has_appeared = true;
968                }
969                for (file, original_check) in &chk_files {
970                    let checksum = modsum_loop.to_bytes(modsum_loop.digest(file).unwrap());
971                    if &checksum != original_check {
972                        eprint!("expected checksum: ");
973                        for x in original_check {
974                            eprint!("{:02x}", x);
975                        }
976                        eprintln!();
977                        eprint!("actual checksum: ");
978                        for x in checksum {
979                            eprint!("{:02x}", x);
980                        }
981                        eprintln!();
982                        eprintln!("checksum: {}", modsum_loop);
983                        eprintln!("original checksum: {}", reference);
984                        return TestResult::failed();
985                    }
986                }
987            }
988            if !has_appeared {
989                eprintln!("{} has not appeared!", reference);
990                return TestResult::failed();
991            }
992            TestResult::passed()
993        }
994    }
995}
delsum_lib/checksum.rs

delsum_lib/
checksum.rs