Skip to main content

delsum_lib/modsum/
rev.rs

1//! This modulus contains the function(s) for reversing the parameters for a modular sum.
2//!
3//! Generally, to find out the parameters, the checksums and their width are needed, and 2 of the following (with at least one file):
4//! * value of `init`
5//! * value of `modulus`
6//! * a file with checksum
7//! * a different file with checksum
8//!
9//! Of course, giving more files will result in fewer false positives.
10use super::{ModSum, ModSumBuilder};
11use crate::checksum::{CheckReverserError, Checksum, filter_opt_err};
12use crate::divisors::{divisors_range, gcd};
13use crate::endian::{SignedInt, WordSpec, wordspec_combos};
14use crate::utils::{cart_prod, unresult_iter};
15use std::iter::Iterator;
16/// Find the parameters of a modsum algorithm.
17///
18/// `spec` contains the known parameters of the algorithm (by setting the corresponding values in the builder).
19/// `chk_bytes` are pairs of files and their checksums.
20/// `verbosity` makes the function output what it is doing
21///
22/// The `width` parameter of the builder has to be set.
23pub fn reverse_modsum<'a>(
24    spec: &ModSumBuilder<u64>,
25    chk_bytes: &[(&'a [u8], Vec<u8>)],
26    verbosity: u64,
27    extended_search: bool,
28) -> impl Iterator<Item = Result<ModSum<u64>, CheckReverserError>> + use<'a> {
29    let chk_bytes = chk_bytes.to_vec();
30    let spec = spec.clone();
31    discrete_combos(&spec, extended_search)
32        .into_iter()
33        .flat_map(move |(wordspec, negated)| {
34            let rev = match spec.width {
35                None => Err(Some(CheckReverserError::MissingParameter("width"))),
36                Some(width) => {
37                    if let Some(chk_words) = chk_bytes
38                        .iter()
39                        .map(|(f, c)| {
40                            (f.len() % wordspec.word_bytes() == 0)
41                                .then_some((wordspec.iter_words(f), c.clone()))
42                        })
43                        .collect::<Option<Vec<_>>>()
44                    {
45                        let revspec = RevSpec {
46                            width,
47                            init: spec.init,
48                            modulus: spec.modulus,
49                            wordspec,
50                            negated,
51                        };
52                        reverse(revspec, chk_words, verbosity).map(|x| x.iter())
53                    } else {
54                        Err(None)
55                    }
56                }
57            };
58            filter_opt_err(unresult_iter(rev))
59        })
60}
61
62fn discrete_combos(spec: &ModSumBuilder<u64>, extended_search: bool) -> Vec<(WordSpec, bool)> {
63    let wordspec_combos = wordspec_combos(
64        spec.wordsize,
65        spec.input_endian,
66        spec.output_endian,
67        spec.signedness,
68        spec.width.unwrap(),
69        extended_search,
70    );
71    let negateds = if let Some(negated) = spec.negated {
72        vec![negated]
73    } else {
74        vec![false, true]
75    };
76    cart_prod(&wordspec_combos, &negateds)
77}
78
79struct RevSpec {
80    width: usize,
81    init: Option<u64>,
82    modulus: Option<u64>,
83    wordspec: WordSpec,
84    negated: bool,
85}
86
87struct RevResult {
88    modlist: Vec<u128>,
89    init: i128,
90    width: usize,
91    wordspec: WordSpec,
92    negated: bool,
93}
94
95impl RevResult {
96    // iterate over all possible moduli and calculate the corresponding init values
97    fn iter(self) -> impl Iterator<Item = ModSum<u64>> {
98        let Self {
99            modlist,
100            init,
101            width,
102            wordspec,
103            negated,
104        } = self;
105        modlist.into_iter().map(move |modulus| {
106            let init_negative = init < 0;
107            let mut init = init.unsigned_abs() % modulus;
108            if init_negative {
109                init = modulus - init;
110            }
111            ModSum::with_options()
112                .width(width)
113                .modulus(modulus as u64)
114                .init(init as u64)
115                .inendian(wordspec.input_endian)
116                .outendian(wordspec.output_endian)
117                .wordsize(wordspec.wordsize)
118                .signedness(wordspec.signedness)
119                .negated(negated)
120                .build()
121                .unwrap()
122        })
123    }
124}
125
126// If we have a file with the bytes [a, b, c, d] we have a checksum of the form (init + a + b + c + d) mod m.
127// By subtracting a + b + c + d from the checksum (without mod'ing by m because we don't know m yet), we get
128// init mod m.
129// If we have two files, we can take their difference and have a number that is 0 mod m, which means m divides this number.
130// The solutions are then the divisors m in the appropiate range.
131fn reverse(
132    spec: RevSpec,
133    chk_bytes: Vec<(impl Iterator<Item = SignedInt<u64>>, Vec<u8>)>,
134    verbosity: u64,
135) -> Result<RevResult, Option<CheckReverserError>> {
136    let log = |s| {
137        if verbosity > 0 {
138            eprintln!("<modsum> {}", s);
139        }
140    };
141    let width = spec.width;
142    let mut sums = Vec::<i128>::new();
143    let max_sum = 1u128 << width;
144    let mut min_sum = 0;
145    log("summing files up");
146    for (f, chk) in chk_bytes {
147        let Some(chk) = Checksum::from_bytes(&chk, spec.wordspec.output_endian, width) else {
148            return Err(None);
149        };
150        min_sum = min_sum.max(chk);
151        // here we calculate (init mod m)
152        let mut sum = -(chk as i128);
153        if spec.negated {
154            sum = -sum;
155        }
156
157        for word in f {
158            if word.negative {
159                sum -= word.value as i128;
160            } else {
161                sum += word.value as i128;
162            }
163        }
164        sums.push(sum);
165    }
166    let original_mod = spec
167        .modulus
168        .map(|x| if x == 0 { 1u128 << width } else { x as u128 });
169    let mut modulus = original_mod.unwrap_or(0);
170    log("removing inits");
171    // here we find modulus by gcd'ing between the differences (init - init == 0 mod m)
172    let init = find_largest_mod(&sums, spec.init.map(i128::from), &mut modulus);
173    if modulus == 0 {
174        return Err(Some(CheckReverserError::UnsuitableFiles(
175            "too short or too similar",
176        )));
177    }
178    log("finding all possible factors");
179    // find all possible divisors
180    let modlist = match original_mod {
181        Some(x) => {
182            if x == modulus && x > min_sum && x <= max_sum {
183                vec![modulus]
184            } else {
185                Vec::new()
186            }
187        }
188        None => divisors_range(modulus, min_sum + 1, max_sum),
189    };
190    Ok(RevResult {
191        modlist,
192        init,
193        width,
194        wordspec: spec.wordspec,
195        negated: spec.negated,
196    })
197}
198
199pub(crate) fn find_largest_mod(
200    sums: &[i128],
201    maybe_init: Option<i128>,
202    modulus: &mut u128,
203) -> i128 {
204    match maybe_init {
205        Some(i) => {
206            // if we already have init, we can just subtract that from the sum and get a multiple of m
207            for s in sums {
208                *modulus = gcd(*modulus, (s + i).unsigned_abs());
209            }
210            i
211        }
212        None => {
213            // otherwise their difference will do, but we do get one gcd less
214            for (s1, s2) in sums.iter().zip(sums.iter().skip(1)) {
215                *modulus = gcd(*modulus, (s1 - s2).unsigned_abs());
216            }
217            -sums[0]
218        }
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use crate::checksum::tests::ReverseFileSet;
226    use crate::endian::Endian;
227    use quickcheck::{Arbitrary, Gen, TestResult};
228    impl Arbitrary for ModSumBuilder<u64> {
229        fn arbitrary(g: &mut Gen) -> Self {
230            let mut new_modsum = ModSum::with_options();
231            let width = u8::arbitrary(g) % 64 + 1;
232            new_modsum.width(width as usize);
233            let modulus = if width < 64 {
234                u64::arbitrary(g) % (1 << width)
235            } else {
236                u64::arbitrary(g)
237            };
238            new_modsum.modulus(modulus);
239            let init = if modulus != 0 {
240                u64::arbitrary(g) % modulus
241            } else {
242                u64::arbitrary(g)
243            };
244            new_modsum.init(init);
245            let wordspec = WordSpec::arbitrary(g);
246            let max_word_width = ((width as usize + 7) / 8).next_power_of_two() * 8;
247            let actual_wordsize = max_word_width.min(wordspec.wordsize);
248            new_modsum.wordsize(actual_wordsize);
249            new_modsum.inendian(if actual_wordsize == 8 {
250                Endian::Big
251            } else {
252                wordspec.input_endian
253            });
254            new_modsum.outendian(wordspec.output_endian);
255            new_modsum.signedness(wordspec.signedness);
256            new_modsum.negated(bool::arbitrary(g));
257            new_modsum
258        }
259    }
260
261    fn run_modsum_rev(
262        files: ReverseFileSet,
263        modsum_build: ModSumBuilder<u64>,
264        known: (bool, bool, bool),
265        wordspec_known: (bool, bool, bool, bool),
266    ) -> TestResult {
267        let modsum = modsum_build.build().unwrap();
268        let mut naive = ModSum::<u64>::with_options();
269        naive.width(modsum_build.width.unwrap());
270        if known.0 {
271            naive.modulus(modsum_build.modulus.unwrap());
272        }
273        if known.1 {
274            naive.init(modsum_build.init.unwrap());
275        }
276        if known.2 {
277            naive.negated(modsum_build.negated.unwrap());
278        }
279        if wordspec_known.0 {
280            naive.wordsize(modsum_build.wordsize.unwrap());
281        }
282        if wordspec_known.1 {
283            naive.inendian(modsum_build.input_endian.unwrap());
284        }
285        if wordspec_known.2 {
286            naive.outendian(modsum_build.output_endian.unwrap());
287        }
288        if wordspec_known.3 {
289            naive.signedness(modsum_build.signedness.unwrap());
290        }
291        let chk_files: Vec<_> = files.with_checksums(&modsum);
292        let reverser = reverse_modsum(&naive, &chk_files, 0, false);
293        files.check_matching(&modsum, reverser)
294    }
295    #[quickcheck]
296    fn qc_modsum_rev(
297        files: ReverseFileSet,
298        modsum_build: ModSumBuilder<u64>,
299        known: (bool, bool, bool),
300        wordspec_known: (bool, bool, bool, bool),
301    ) -> TestResult {
302        run_modsum_rev(files, modsum_build, known, wordspec_known)
303    }
304    #[test]
305    fn error1() {
306        let modsum = ModSum::with_options()
307            .width(38)
308            .modulus(10)
309            .init(1)
310            .build()
311            .unwrap();
312        let f = ReverseFileSet(vec![
313            vec![],
314            vec![0],
315            vec![30, 98, 74, 46, 90, 70, 18, 37, 44, 53, 53, 20, 47, 39],
316        ]);
317        let chk_files: Vec<_> = f.with_checksums(&modsum);
318        let mut naive = ModSum::<u64>::with_options();
319        naive.width(38);
320        let m: Result<Vec<_>, _> = reverse_modsum(&naive, &chk_files, 0, false).collect();
321        if let Ok(x) = m {
322            assert!(x.contains(&modsum))
323        }
324    }
325    #[test]
326    fn error2() {
327        let modsum = ModSum::with_options()
328            .width(38)
329            .modulus(40)
330            .init(2)
331            .build()
332            .unwrap();
333        let f = ReverseFileSet(vec![
334            vec![61, 25, 35, 56, 90, 96, 75],
335            vec![8, 94, 62, 74],
336            vec![82, 11, 99, 46],
337        ]);
338        let chk_files: Vec<_> = f.with_checksums(&modsum);
339        let mut naive = ModSum::<u64>::with_options();
340        naive.width(38);
341        let m: Result<Vec<_>, _> = reverse_modsum(&naive, &chk_files, 0, false).collect();
342        if let Ok(x) = m {
343            assert!(x.contains(&modsum))
344        }
345    }
346    #[test]
347    fn error_preset_modulus() {
348        let modsum = ModSum::with_options()
349            .width(45)
350            .modulus(75u64)
351            .init(38)
352            .inendian(Endian::Big)
353            .outendian(Endian::Big)
354            .wordsize(64)
355            .build()
356            .unwrap();
357        let f = ReverseFileSet(vec![
358            vec![
359                33, 34, 85, 19, 55, 8, 87, 34, 35, 87, 39, 6, 16, 86, 80, 55, 69, 46, 64, 64, 14,
360                17, 25, 59,
361            ],
362            vec![93, 77, 50, 93, 18, 85, 12, 23, 32, 3, 7, 76, 90, 45, 70, 65],
363            vec![93, 26, 87, 27, 97, 36, 78, 48],
364        ]);
365        let chk_files: Vec<_> = f.with_checksums(&modsum);
366        let mut naive = ModSum::<u64>::with_options();
367        naive.width(45).modulus(75);
368        let m = reverse_modsum(&naive, &chk_files, 0, false);
369        assert!(!f.check_matching(&modsum, m).is_failure())
370    }
371    #[test]
372    fn error3() {
373        // caused by bug in factoring algorithm
374        let modsum = ModSum::with_options()
375            .width(34)
376            .modulus(0x15758e195u64)
377            .init(0xd31ee539)
378            .inendian(Endian::Big)
379            .outendian(Endian::Little)
380            .wordsize(32)
381            .build()
382            .unwrap();
383        let f = ReverseFileSet(vec![
384            vec![
385                13, 172, 74, 40, 206, 163, 7, 169, 20, 194, 253, 171, 168, 190, 255, 187, 150, 56,
386                44, 212, 115, 70, 66, 86, 97, 111, 139, 202, 115, 189, 255, 117, 112, 225, 215,
387                168, 211, 64, 1, 26, 127, 1, 71, 249, 71, 212, 144, 47, 253, 140, 57, 42, 232, 170,
388                62, 240,
389            ],
390            vec![
391                122, 13, 224, 25, 74, 129, 163, 253, 0, 233, 255, 250, 216, 209, 105, 175, 148, 98,
392                154, 210, 9, 216, 253, 18, 0, 56, 26, 85, 104, 61, 0, 19, 156, 103, 255, 6, 122,
393                230, 106, 5,
394            ],
395            vec![
396                187, 202, 75, 213, 99, 51, 90, 0, 219, 82, 79, 0, 144, 98, 34, 80, 90, 1, 189, 10,
397                137, 199, 176, 174,
398            ],
399        ]);
400        let chk_files: Vec<_> = f.with_checksums(&modsum);
401        let mut naive = ModSum::<u64>::with_options();
402        naive.width(34);
403        let m = reverse_modsum(&naive, &chk_files, 0, false);
404        assert!(!f.check_matching(&modsum, m).is_failure())
405    }
406}