Skip to main content

delsum_lib/
lib.rs

1mod bitnum;
2pub mod checksum;
3mod divisors;
4mod keyval;
5pub mod utils;
6
7pub mod crc;
8pub(crate) mod endian;
9pub mod fletcher;
10pub mod modsum;
11pub mod polyhash;
12
13use bitnum::BitNum;
14use checksum::{CheckBuilderErr, CheckReverserError, const_sum};
15use checksum::{Digest, LinearCheck, RangePair};
16use crc::{CRC, CrcBuilder, reverse_crc};
17use fletcher::{Fletcher, FletcherBuilder, reverse_fletcher};
18use modsum::{ModSum, ModSumBuilder, reverse_modsum};
19use num_traits::Zero;
20use polyhash::{PolyHash, PolyHashBuilder, reverse_polyhash};
21use std::cmp::Ordering;
22use std::error::Error;
23use std::fmt::Display;
24use std::str::FromStr;
25use std::sync::Arc;
26use utils::SignedInclRange;
27#[cfg(feature = "parallel")]
28use {
29    crc::reverse_crc_para, fletcher::reverse_fletcher_para, polyhash::reverse_polyhash_para,
30    rayon::prelude::*,
31};
32#[cfg(test)]
33#[macro_use(quickcheck)]
34extern crate quickcheck_macros;
35
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub enum DelsumError {
38    ModelError(CheckBuilderErr),
39    /// The number of files does not agree with the number of checksums
40    ChecksumCountMismatch(&'static str),
41    WordsizeMisalignment,
42}
43
44impl From<CheckBuilderErr> for DelsumError {
45    fn from(e: CheckBuilderErr) -> Self {
46        DelsumError::ModelError(e)
47    }
48}
49
50impl Display for DelsumError {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        match self {
53            DelsumError::ModelError(e) => write!(f, "{}", e),
54            DelsumError::ChecksumCountMismatch(s) => write!(f, "{}", s),
55            DelsumError::WordsizeMisalignment => {
56                write!(
57                    f,
58                    "The checksummed region is not a multiple of the wordsize"
59                )
60            }
61        }
62    }
63}
64
65impl Error for DelsumError {
66    fn source(&self) -> Option<&(dyn Error + 'static)> {
67        match self {
68            DelsumError::ModelError(e) => Some(e),
69            DelsumError::ChecksumCountMismatch(_) | DelsumError::WordsizeMisalignment => None,
70        }
71    }
72}
73
74/// For figuring out what type of integer to use, we need to parse the width from the
75/// model string, but to parse the model string, we need to know the integer type,
76/// so it is done here separately.
77/// We also need the prefix to find out what algorithm to use
78fn find_prefix_width(s: &str) -> Result<(&str, usize, &str), CheckBuilderErr> {
79    let stripped = s.trim_start();
80    // it is done like this to ensure that no non-whitespace (blackspace?) is left at the end of the prefix
81    let pref = stripped.split_whitespace().next();
82    let (prefix, rest) = match PREFIXES.iter().find(|x| Some(**x) == pref) {
83        Some(p) => (*p, &stripped[p.len()..]),
84        None => return Err(CheckBuilderErr::MalformedString("algorithm".to_owned())),
85    };
86    for x in keyval::KeyValIter::new(rest) {
87        match x {
88            Err(k) => return Err(CheckBuilderErr::MalformedString(k)),
89            Ok((k, v)) => {
90                if &k == "width" {
91                    return v
92                        .parse()
93                        .map_err(|_| CheckBuilderErr::MalformedString(k))
94                        .map(|width| (prefix, width, rest));
95                }
96            }
97        }
98    }
99    Err(CheckBuilderErr::MissingParameter("width"))
100}
101
102// modifies the end_range so that there is
103// checksum_size bytes padding to the end
104// in all files
105fn cutoff_checksum_length(
106    end_range: SignedInclRange,
107    bytes: impl Iterator<Item = usize>,
108    checksum_size: usize,
109) -> Option<SignedInclRange> {
110    let end = end_range.end();
111    if end < 0 {
112        let new_end = (-1 - checksum_size as isize).min(end);
113        end_range.set_end(new_end)
114    } else {
115        let min_len = bytes.min()? as isize;
116        let new_end = (min_len - checksum_size as isize - 1).min(end);
117        end_range.set_end(new_end)
118    }
119}
120
121fn byte_width<L: Digest>(spec: &L) -> usize
122where
123    L::Sum: BitNum,
124{
125    spec.to_bytes(<L::Sum as Zero>::zero()).len()
126}
127
128#[derive(Clone, Copy)]
129pub enum SegmentChecksum<'a> {
130    FromEnd(usize),
131    Constant(&'a [Vec<u8>]),
132}
133
134impl<'a> SegmentChecksum<'a> {
135    // takes a bunch of files with SegmentChecksum and resolves the checksums
136    fn resolve<'b>(
137        &self,
138        width: usize,
139        bytes: &[&'b [u8]],
140    ) -> Result<Vec<(&'b [u8], Vec<u8>)>, Option<DelsumError>> {
141        match self {
142            SegmentChecksum::FromEnd(gap) => {
143                let width = width.div_ceil(8);
144                let Some(len) = cutoff_checksum_length(
145                    SignedInclRange::new(0, -1).unwrap(),
146                    bytes.iter().map(|x| x.len()),
147                    width + gap,
148                ) else {
149                    return Err(None);
150                };
151
152                let checksum_part = SignedInclRange::new(-(width as isize), -1).unwrap();
153                let Some(t) = bytes
154                    .iter()
155                    .map(|x| {
156                        let file_part = len.slice(x)?;
157                        let checksum_part = checksum_part.slice(x)?;
158                        Some((file_part, checksum_part.to_vec()))
159                    })
160                    .collect()
161                else {
162                    return Err(None);
163                };
164                Ok(t)
165            }
166            SegmentChecksum::Constant(checksums) => {
167                if let Some(err) = check_count_mismatch(bytes.len(), checksums.len()) {
168                    return Err(err.into());
169                }
170
171                Ok(bytes
172                    .iter()
173                    .copied()
174                    .zip(checksums.iter().cloned())
175                    .collect())
176            }
177        }
178    }
179}
180
181fn check_count_mismatch(bytes_len: usize, checksums_len: usize) -> Option<DelsumError> {
182    match checksums_len.cmp(&bytes_len) {
183        Ordering::Greater => {
184            return Some(DelsumError::ChecksumCountMismatch(
185                "not enough files for checksums given",
186            ));
187        }
188        Ordering::Less => {
189            return Some(DelsumError::ChecksumCountMismatch(
190                "not enough checksums for files given",
191            ));
192        }
193        Ordering::Equal => None,
194    }
195}
196
197/// A helper function for calling the find_segments function with strings arguments
198fn find_segment_str<L>(
199    spec: &str,
200    bytes: &[Vec<u8>],
201    sum: SegmentChecksum,
202    start_range: SignedInclRange,
203    end_range: SignedInclRange,
204) -> Result<Vec<RangePair>, DelsumError>
205where
206    L: LinearCheck + FromStr<Err = CheckBuilderErr>,
207    L::Sum: BitNum,
208{
209    let spec = Arc::new(L::from_str(spec)?);
210    match sum {
211        SegmentChecksum::Constant(sum_bytes) => {
212            if let Some(err) = check_count_mismatch(bytes.len(), sum_bytes.len()) {
213                return Err(err.into());
214            }
215            let sum_array: Vec<_> = sum_bytes
216                .iter()
217                .map(|x| const_sum(spec.checksum_from_bytes(x)))
218                .collect();
219            Ok(spec.find_segments_range(bytes, &sum_array, start_range, end_range))
220        }
221        SegmentChecksum::FromEnd(n) => {
222            let width = byte_width(&*spec);
223            let checksum_length = width + n;
224            let Some(end_range) =
225                cutoff_checksum_length(end_range, bytes.iter().map(|x| x.len()), checksum_length)
226            else {
227                return Ok(Vec::new());
228            };
229            let sum_array: Vec<_> = bytes
230                .iter()
231                .map(|_| {
232                    let spec = spec.clone();
233                    move |bytes: &[u8], addr: usize| {
234                        let start = addr + n;
235                        let end = addr + checksum_length;
236                        spec.checksum_from_bytes(&bytes[start..end])
237                    }
238                })
239                .collect();
240            Ok(spec.find_segments_range(bytes, &sum_array, start_range, end_range))
241        }
242    }
243}
244
245/// The available checksum types
246static PREFIXES: &[&str] = &["fletcher", "crc", "modsum", "polyhash"];
247
248/// A stringy function for determining which segments of a file have a given checksum.
249///
250/// It is given
251/// * a string that models a checksum algorithm
252/// * a vector of bytes slices (each slice containing the bytes of a file)
253/// * a comma-separated string (without whitespace) containing target checksums for each file
254/// * a parameter indicating whether the ends of the segments are relative to the start or the end of the file
255///
256/// # The Model String
257/// A model string is generally of the form
258/// ```text
259/// [algorithm] width=[number] {more parameters}
260/// ```
261/// The `algorithm` parameter is either `fletcher`, `crc` or `modsum`.
262/// Parameters depend solely on what kind of algorithm is used and more information is available
263/// at the respective Builders.
264pub fn find_checksum_segments(
265    strspec: &str,
266    bytes: &[Vec<u8>],
267    sum: SegmentChecksum,
268    start_range: SignedInclRange,
269    end_range: SignedInclRange,
270) -> Result<Vec<RangePair>, DelsumError> {
271    let (prefix, width, rest) = find_prefix_width(strspec)?;
272    match (width, prefix) {
273        (1..=32, "crc") => find_segment_str::<CRC<u32>>(rest, bytes, sum, start_range, end_range),
274        (33..=64, "crc") => find_segment_str::<CRC<u64>>(rest, bytes, sum, start_range, end_range),
275        (65..=128, "crc") => {
276            find_segment_str::<CRC<u128>>(rest, bytes, sum, start_range, end_range)
277        }
278        (1..=32, "modsum") => {
279            find_segment_str::<ModSum<u32>>(rest, bytes, sum, start_range, end_range)
280        }
281        (33..=64, "modsum") => {
282            find_segment_str::<ModSum<u64>>(rest, bytes, sum, start_range, end_range)
283        }
284        (1..=32, "fletcher") => {
285            find_segment_str::<Fletcher<u16>>(rest, bytes, sum, start_range, end_range)
286        }
287        (33..=64, "fletcher") => {
288            find_segment_str::<Fletcher<u32>>(rest, bytes, sum, start_range, end_range)
289        }
290        (65..=128, "fletcher") => {
291            find_segment_str::<Fletcher<u64>>(rest, bytes, sum, start_range, end_range)
292        }
293        (1..=32, "polyhash") => {
294            find_segment_str::<PolyHash<u32>>(rest, bytes, sum, start_range, end_range)
295        }
296        (33..=64, "polyhash") => {
297            find_segment_str::<PolyHash<u64>>(rest, bytes, sum, start_range, end_range)
298        }
299        _ => Err(CheckBuilderErr::ValueOutOfRange("width").into()),
300    }
301}
302
303fn get_checksums<A>(
304    strspec: &str,
305    files: &[&[u8]],
306    width: usize,
307) -> Result<Vec<Vec<u8>>, DelsumError>
308where
309    A: Digest + FromStr<Err = CheckBuilderErr>,
310    A::Sum: crate::bitnum::BitNum,
311{
312    let algo = A::from_str(strspec)?;
313    let mut sums = Vec::new();
314    for file in files {
315        if file.len() % algo.wordspec().word_bytes() != 0 {
316            return Err(DelsumError::WordsizeMisalignment);
317        }
318        sums.push(
319            algo.wordspec()
320                .output_to_bytes(algo.digest(file).unwrap(), width),
321        );
322    }
323    Ok(sums)
324}
325
326pub fn find_checksum(strspec: &str, bytes: &[&[u8]]) -> Result<Vec<Vec<u8>>, DelsumError> {
327    let (prefix, width, rest) = find_prefix_width(strspec)?;
328    match (width, prefix) {
329        (1..=64, "crc") => get_checksums::<CRC<u64>>(rest, bytes, width),
330        (65..=128, "crc") => get_checksums::<CRC<u128>>(rest, bytes, width),
331        (1..=64, "modsum") => get_checksums::<ModSum<u64>>(rest, bytes, width),
332        (2..=64, "polyhash") => get_checksums::<PolyHash<u64>>(rest, bytes, width),
333        (1..=128, "fletcher") => get_checksums::<Fletcher<u64>>(rest, bytes, width),
334        _ => Err(CheckBuilderErr::ValueOutOfRange("width").into()),
335    }
336}
337
338enum BuilderEnum {
339    Crc(CrcBuilder<u128>),
340    ModSum(ModSumBuilder<u64>),
341    Fletcher(FletcherBuilder<u64>),
342    PolyHash(PolyHashBuilder<u64>),
343}
344
345pub struct AlgorithmFinder<'a> {
346    pairs: Vec<(&'a [u8], Vec<u8>)>,
347    spec: BuilderEnum,
348    verbosity: u64,
349    extended_search: bool,
350}
351
352type ReverserFn<'a, T, I> = fn(&T, &[(&'a [u8], Vec<u8>)], u64, bool) -> I;
353
354impl<'a> AlgorithmFinder<'a> {
355    fn iter_solutions<T, S: ToString, E, I: Iterator<Item = Result<S, E>>>(
356        &self,
357        x: &T,
358        reverser: ReverserFn<'a, T, I>,
359    ) -> impl Iterator<Item = Result<String, E>> + use<T, S, E, I> {
360        reverser(x, &self.pairs, self.verbosity, self.extended_search)
361            .map(|x| x.map(|y| y.to_string()))
362    }
363
364    #[cfg(feature = "parallel")]
365    fn par_iter_solutions<T, S, E: Send + Sync, I: ParallelIterator<Item = Result<S, E>>>(
366        &self,
367        x: &T,
368        reverser: ReverserFn<'a, T, I>,
369    ) -> impl ParallelIterator<Item = Result<String, E>> + use<T, S, E, I>
370    where
371        S: ToString,
372    {
373        reverser(x, &self.pairs, self.verbosity, self.extended_search)
374            .map(|x| x.map(|y| y.to_string()))
375    }
376
377    pub fn find_all(&self) -> impl Iterator<Item = Result<String, CheckReverserError>> + use<'a> {
378        let maybe_crc = if let BuilderEnum::Crc(crc) = &self.spec {
379            Some(self.iter_solutions(crc, reverse_crc))
380        } else {
381            None
382        };
383        let maybe_modsum = if let BuilderEnum::ModSum(modsum) = &self.spec {
384            Some(self.iter_solutions(modsum, reverse_modsum))
385        } else {
386            None
387        };
388        let maybe_fletcher = if let BuilderEnum::Fletcher(fletcher) = &self.spec {
389            Some(self.iter_solutions(fletcher, reverse_fletcher))
390        } else {
391            None
392        };
393        let maybe_polyhash = if let BuilderEnum::PolyHash(polyhash) = &self.spec {
394            Some(self.iter_solutions(polyhash, reverse_polyhash))
395        } else {
396            None
397        };
398
399        maybe_crc
400            .into_iter()
401            .flatten()
402            .chain(maybe_modsum.into_iter().flatten())
403            .chain(maybe_fletcher.into_iter().flatten())
404            .chain(maybe_polyhash.into_iter().flatten())
405    }
406
407    #[cfg(feature = "parallel")]
408    pub fn find_all_para(
409        &self,
410    ) -> impl ParallelIterator<Item = Result<String, CheckReverserError>> + use<'a> {
411        let maybe_crc = if let BuilderEnum::Crc(crc) = &self.spec {
412            Some(self.par_iter_solutions(crc, reverse_crc_para))
413        } else {
414            None
415        };
416        let maybe_modsum = if let BuilderEnum::ModSum(modsum) = &self.spec {
417            Some(self.iter_solutions(modsum, reverse_modsum).par_bridge())
418        } else {
419            None
420        };
421        let maybe_fletcher = if let BuilderEnum::Fletcher(fletcher) = &self.spec {
422            Some(self.par_iter_solutions(fletcher, reverse_fletcher_para))
423        } else {
424            None
425        };
426        let maybe_polyhash = if let BuilderEnum::PolyHash(polyhash) = &self.spec {
427            Some(self.par_iter_solutions(polyhash, reverse_polyhash_para))
428        } else {
429            None
430        };
431
432        maybe_crc
433            .into_par_iter()
434            .flatten()
435            .chain(maybe_modsum.into_par_iter().flatten())
436            .chain(maybe_fletcher.into_par_iter().flatten())
437            .chain(maybe_polyhash.into_par_iter().flatten())
438    }
439}
440
441pub fn find_algorithm<'a>(
442    strspec: &str,
443    bytes: &[&'a [u8]],
444    sums: SegmentChecksum,
445    verbosity: u64,
446    extended_search: bool,
447) -> Result<AlgorithmFinder<'a>, DelsumError> {
448    let (prefix, width, rest) = find_prefix_width(strspec)?;
449    let prefix = prefix.to_ascii_lowercase();
450    let spec = match prefix.as_str() {
451        "crc" => BuilderEnum::Crc(CrcBuilder::<u128>::from_str(rest)?),
452        "modsum" => BuilderEnum::ModSum(ModSumBuilder::<u64>::from_str(rest)?),
453        "fletcher" => BuilderEnum::Fletcher(FletcherBuilder::<u64>::from_str(rest)?),
454        "polyhash" => BuilderEnum::PolyHash(PolyHashBuilder::<u64>::from_str(rest)?),
455        _ => unimplemented!(),
456    };
457    let pairs = match sums.resolve(width, bytes) {
458        Ok(p) => p,
459        Err(None) => todo!(),
460        Err(Some(e)) => {
461            return Err(e);
462        }
463    };
464    Ok(AlgorithmFinder {
465        pairs,
466        spec,
467        verbosity,
468        extended_search,
469    })
470}
471
472#[cfg(test)]
473mod tests {
474
475    use super::*;
476    #[test]
477    fn multibyte_part_range() {
478        assert_eq!(
479            find_checksum_segments(
480                "modsum width=16 wordsize=24 modulus=0x0",
481                &[vec![0u8; 15]],
482                SegmentChecksum::Constant(&[vec![0, 0]]),
483                SignedInclRange::new(0, 5).unwrap(),
484                SignedInclRange::new(-5, -2).unwrap()
485            ),
486            Ok(vec![
487                (vec![0, 3], vec![-4]),
488                (vec![1, 4], vec![-3]),
489                (vec![2, 5], vec![-5, -2])
490            ])
491        );
492        assert_eq!(
493            find_checksum_segments(
494                "modsum width=16 wordsize=16 modulus=0x0 wordsize=16",
495                &[vec![0u8; 15], vec![0u8; 12], vec![0u8; 9]],
496                SegmentChecksum::Constant(&[vec![0, 0], vec![0, 0], vec![0, 0]]),
497                SignedInclRange::new(0, 8).unwrap(),
498                SignedInclRange::new(-9, -1).unwrap(),
499            ),
500            Ok(vec![])
501        );
502        assert_eq!(
503            find_checksum_segments(
504                "modsum width=16 wordsize=16 modulus=0x0 wordsize=24",
505                &[vec![0u8; 15], vec![0u8; 12], vec![0u8; 9]],
506                SegmentChecksum::Constant(&[vec![0, 0], vec![0, 0], vec![0, 0]]),
507                SignedInclRange::new(0, 8).unwrap(),
508                SignedInclRange::new(-9, -1).unwrap(),
509            ),
510            Ok(vec![
511                (vec![0, 3, 6], vec![-7, -4, -1]),
512                (vec![1, 4], vec![-6, -3]),
513                (vec![2, 5], vec![-5, -2]),
514            ])
515        );
516        assert_eq!(
517            find_checksum_segments(
518                "crc width=16 poly=0x1 wordsize=16 in_endian=little out_endian=little",
519                &[
520                    vec![0x6d, 0x79, 0x72, 0x3f, 0x00, 0x5d],
521                    vec![0x75, 0x2d, 0xf4, 0xd4, 0xf5, 0xcf, 0xd8, 0x35]
522                ],
523                SegmentChecksum::Constant(&[vec![0x72, 0x3f], vec![0x01, 0x1b]]),
524                SignedInclRange::new(0, 5).unwrap(),
525                SignedInclRange::new(-7, -1).unwrap(),
526            ),
527            Ok(vec![(vec![2], vec![-3])])
528        );
529    }
530    #[test]
531    fn png_checksums() {
532        let png = vec![
533            0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48,
534            0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00,
535            0x00, 0x37, 0x6e, 0xf9, 0x24, 0x00, 0x00, 0x00, 0x0a, 0x49, 0x44, 0x41, 0x54, 0x78,
536            0x01, 0x63, 0x60, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0x73, 0x75, 0x01, 0x18, 0x00,
537            0x00, 0x00, 0x00, 0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82,
538        ];
539        assert_eq!(
540            find_checksum_segments(
541                "crc width=32 poly=0x04c11db7 init=0xffffffff refin=true refout=true xorout=0xffffffff out_endian=big",
542                &[png.clone()],
543                SegmentChecksum::FromEnd(0),
544                SignedInclRange::new(0, png.len() as _).unwrap(),
545                SignedInclRange::new(0, png.len() as _).unwrap(),
546            ),
547            Ok(vec![
548                (vec![12], vec![28]),
549                (vec![37], vec![50]),
550                (vec![59], vec![62])
551            ])
552        );
553    }
554}