Skip to main content

socket_patch_core/patch/
diff.rs

1//! Per-file diff (bsdiff) apply support.
2//!
3//! A `diff` is a binary delta in bsdiff 4.x format that transforms the
4//! `beforeHash` bytes of a file into the `afterHash` bytes. We store diffs
5//! grouped by patch UUID — see [`crate::patch::package`] for the tar.gz
6//! archive layout.
7
8use qbsdiff::Bspatch;
9
10/// Upper bound on how many bytes we pre-reserve for the patched output.
11///
12/// `Bspatch::hint_target_size()` returns the target size read verbatim from
13/// the bsdiff header (bytes 24..32). qbsdiff's parser validates the control
14/// and delta block lengths against the actual payload but never validates
15/// this field — so a malformed or hostile delta can claim an arbitrary
16/// target size (up to `i64::MAX`) while carrying only a few bytes of data.
17///
18/// Feeding that value straight into `Vec::with_capacity` lets a tiny delta
19/// request a multi-exabyte reservation, which either panics with "capacity
20/// overflow" or aborts the process via the allocator. Neither is something
21/// the caller can recover from, so it breaks the never-panic-on-bad-input
22/// contract the patch engine depends on (see the tests below).
23///
24/// The reservation is a pure optimization: `apply` is driven entirely by the
25/// control stream and grows the output `Vec` on demand as it writes, so
26/// clamping the hint never changes the result — it only bounds the number of
27/// reallocations for legitimately large files.
28const MAX_PREALLOC_BYTES: u64 = 64 * 1024 * 1024; // 64 MiB
29
30/// Apply a bsdiff delta to `before` and return the resulting bytes.
31///
32/// Returns an `std::io::Error` when the delta is malformed or applying it
33/// fails (for example, the delta was produced from a different source).
34pub fn apply_diff(before: &[u8], delta: &[u8]) -> Result<Vec<u8>, std::io::Error> {
35    let patcher = Bspatch::new(delta)?;
36    // Clamp the attacker-controlled size hint: a corrupt/hostile header must
37    // not be able to turn a small delta into a process-killing allocation.
38    let prealloc = patcher.hint_target_size().min(MAX_PREALLOC_BYTES) as usize;
39    let mut out = Vec::with_capacity(prealloc);
40    patcher.apply(before, std::io::Cursor::new(&mut out))?;
41    Ok(out)
42}
43
44#[cfg(test)]
45mod tests {
46    use super::*;
47    use qbsdiff::Bsdiff;
48
49    fn make_delta(before: &[u8], after: &[u8]) -> Vec<u8> {
50        let mut delta = Vec::new();
51        Bsdiff::new(before, after)
52            .compare(std::io::Cursor::new(&mut delta))
53            .expect("compare");
54        delta
55    }
56
57    #[test]
58    fn test_apply_diff_text_round_trip() {
59        let before = b"the quick brown fox jumps over the lazy dog";
60        let after = b"the quick brown cat jumps over the lazy dog";
61        let delta = make_delta(before, after);
62        let result = apply_diff(before, &delta).unwrap();
63        assert_eq!(result, after);
64    }
65
66    #[test]
67    fn test_apply_diff_binary_round_trip() {
68        let before: Vec<u8> = (0..1024u32).map(|i| (i % 251) as u8).collect();
69        let mut after = before.clone();
70        // Mutate a handful of bytes scattered through the buffer.
71        for i in [10usize, 200, 500, 900] {
72            after[i] = after[i].wrapping_add(7);
73        }
74        let delta = make_delta(&before, &after);
75        let result = apply_diff(&before, &delta).unwrap();
76        assert_eq!(result, after);
77    }
78
79    #[test]
80    fn test_apply_diff_empty_to_nonempty() {
81        let before: &[u8] = b"";
82        let after = b"hello";
83        let delta = make_delta(before, after);
84        let result = apply_diff(before, &delta).unwrap();
85        assert_eq!(result, after);
86    }
87
88    #[test]
89    fn test_apply_diff_malformed_errors() {
90        // Random bytes are extremely unlikely to be a valid bsdiff header.
91        let bogus_delta = b"not a real bsdiff delta";
92        let result = apply_diff(b"anything", bogus_delta);
93        assert!(result.is_err(), "expected malformed-delta error");
94    }
95
96    #[test]
97    fn test_apply_diff_wrong_source_does_not_panic() {
98        // Build a delta from one source then try to apply it to a different
99        // source. qbsdiff's bspatch is content-agnostic but should still
100        // produce *some* output without panicking — the caller is
101        // responsible for verifying the result hash matches the expected
102        // `after_hash`. This test exists to lock in the
103        // never-panic-on-bad-input contract callers depend on.
104        let src_a = b"AAAAAAAAAAAAAAAAAAAA";
105        let src_b = b"BBBBBBBBBBBBBBBBBBBB";
106        let target = b"CCCCCCCCCCCCCCCCCCCC";
107        let delta = make_delta(src_a, target);
108        // Result may or may not equal target — what matters is no panic.
109        let _ = apply_diff(src_b, &delta);
110    }
111
112    #[test]
113    fn test_apply_diff_forged_oversize_header_is_safe() {
114        // Regression: `apply_diff` used to feed `hint_target_size()` straight
115        // into `Vec::with_capacity`. That field is the bsdiff header's target
116        // size (little-endian bytes 24..32) and is NOT validated by qbsdiff
117        // against the real payload, so a corrupt/hostile delta can claim an
118        // enormous size. A multi-exabyte `with_capacity` aborts the process
119        // (allocator failure) or panics with "capacity overflow" — neither is
120        // recoverable, which would let a single bad patch take the tool down.
121        //
122        // We build a genuine, small delta and then overwrite only the target
123        // size field with ~1.15 EiB. Because `apply` is driven by the control
124        // stream and ignores the hint, the clamp lets the patch still produce
125        // the correct bytes instead of dying on the allocation.
126        let before = b"the quick brown fox jumps over the lazy dog";
127        let after = b"the quick brown cat jumps over the lazy dog";
128        let mut forged = make_delta(before, after);
129        assert!(forged.len() >= 32, "delta must contain a full header");
130        // Stay positive (top bit clear) so qbsdiff decodes it as a large
131        // unsigned size rather than a negative offset.
132        let huge: u64 = 1 << 60;
133        forged[24..32].copy_from_slice(&huge.to_le_bytes());
134
135        let result = apply_diff(before, &forged).expect("clamped apply must succeed");
136        assert_eq!(
137            result, after,
138            "forging the size hint must not corrupt output"
139        );
140    }
141
142    #[test]
143    fn test_apply_diff_capacity_hint_is_clamped() {
144        // Pin the clamp itself so the bound can't silently regress back to an
145        // unbounded reservation. The output capacity is never reserved beyond
146        // MAX_PREALLOC_BYTES regardless of what the header claims.
147        let huge_hint: u64 = u64::MAX;
148        let clamped = huge_hint.min(MAX_PREALLOC_BYTES) as usize;
149        assert_eq!(clamped, MAX_PREALLOC_BYTES as usize);
150        // A modest, honest hint passes through untouched.
151        let small_hint: u64 = 4096;
152        assert_eq!(small_hint.min(MAX_PREALLOC_BYTES) as usize, 4096);
153    }
154}