socket_patch_core/patch/diff.rs
1//! Per-file diff (bsdiff) apply support.
2//!
3//! A `diff` is a binary delta in bsdiff 4.x format that transforms the
4//! `beforeHash` bytes of a file into the `afterHash` bytes. We store diffs
5//! grouped by patch UUID — see [`crate::patch::package`] for the tar.gz
6//! archive layout.
7
8use qbsdiff::Bspatch;
9
10/// Upper bound on how many bytes we pre-reserve for the patched output.
11///
12/// `Bspatch::hint_target_size()` returns the target size read verbatim from
13/// the bsdiff header (bytes 24..32). qbsdiff's parser validates the control
14/// and delta block lengths against the actual payload but never validates
15/// this field — so a malformed or hostile delta can claim an arbitrary
16/// target size (up to `i64::MAX`) while carrying only a few bytes of data.
17///
18/// Feeding that value straight into `Vec::with_capacity` lets a tiny delta
19/// request a multi-exabyte reservation, which either panics with "capacity
20/// overflow" or aborts the process via the allocator. Neither is something
21/// the caller can recover from, so it breaks the never-panic-on-bad-input
22/// contract the patch engine depends on (see the tests below).
23///
24/// The reservation is a pure optimization: `apply` is driven entirely by the
25/// control stream and grows the output `Vec` on demand as it writes, so
26/// clamping the hint never changes the result — it only bounds the number of
27/// reallocations for legitimately large files.
28const MAX_PREALLOC_BYTES: u64 = 64 * 1024 * 1024; // 64 MiB
29
30/// Apply a bsdiff delta to `before` and return the resulting bytes.
31///
32/// Returns an `std::io::Error` when the delta is malformed or applying it
33/// fails (for example, the delta was produced from a different source).
34pub fn apply_diff(before: &[u8], delta: &[u8]) -> Result<Vec<u8>, std::io::Error> {
35 let patcher = Bspatch::new(delta)?;
36 // Clamp the attacker-controlled size hint: a corrupt/hostile header must
37 // not be able to turn a small delta into a process-killing allocation.
38 let prealloc = patcher.hint_target_size().min(MAX_PREALLOC_BYTES) as usize;
39 let mut out = Vec::with_capacity(prealloc);
40 patcher.apply(before, std::io::Cursor::new(&mut out))?;
41 Ok(out)
42}
43
44#[cfg(test)]
45mod tests {
46 use super::*;
47 use qbsdiff::Bsdiff;
48
49 fn make_delta(before: &[u8], after: &[u8]) -> Vec<u8> {
50 let mut delta = Vec::new();
51 Bsdiff::new(before, after)
52 .compare(std::io::Cursor::new(&mut delta))
53 .expect("compare");
54 delta
55 }
56
57 #[test]
58 fn test_apply_diff_text_round_trip() {
59 let before = b"the quick brown fox jumps over the lazy dog";
60 let after = b"the quick brown cat jumps over the lazy dog";
61 let delta = make_delta(before, after);
62 let result = apply_diff(before, &delta).unwrap();
63 assert_eq!(result, after);
64 }
65
66 #[test]
67 fn test_apply_diff_binary_round_trip() {
68 let before: Vec<u8> = (0..1024u32).map(|i| (i % 251) as u8).collect();
69 let mut after = before.clone();
70 // Mutate a handful of bytes scattered through the buffer.
71 for i in [10usize, 200, 500, 900] {
72 after[i] = after[i].wrapping_add(7);
73 }
74 let delta = make_delta(&before, &after);
75 let result = apply_diff(&before, &delta).unwrap();
76 assert_eq!(result, after);
77 }
78
79 #[test]
80 fn test_apply_diff_empty_to_nonempty() {
81 let before: &[u8] = b"";
82 let after = b"hello";
83 let delta = make_delta(before, after);
84 let result = apply_diff(before, &delta).unwrap();
85 assert_eq!(result, after);
86 }
87
88 #[test]
89 fn test_apply_diff_malformed_errors() {
90 // Random bytes are extremely unlikely to be a valid bsdiff header.
91 let bogus_delta = b"not a real bsdiff delta";
92 let result = apply_diff(b"anything", bogus_delta);
93 assert!(result.is_err(), "expected malformed-delta error");
94 }
95
96 #[test]
97 fn test_apply_diff_wrong_source_does_not_panic() {
98 // Build a delta from one source then try to apply it to a different
99 // source. qbsdiff's bspatch is content-agnostic but should still
100 // produce *some* output without panicking — the caller is
101 // responsible for verifying the result hash matches the expected
102 // `after_hash`. This test exists to lock in the
103 // never-panic-on-bad-input contract callers depend on.
104 let src_a = b"AAAAAAAAAAAAAAAAAAAA";
105 let src_b = b"BBBBBBBBBBBBBBBBBBBB";
106 let target = b"CCCCCCCCCCCCCCCCCCCC";
107 let delta = make_delta(src_a, target);
108 // Result may or may not equal target — what matters is no panic.
109 let _ = apply_diff(src_b, &delta);
110 }
111
112 #[test]
113 fn test_apply_diff_forged_oversize_header_is_safe() {
114 // Regression: `apply_diff` used to feed `hint_target_size()` straight
115 // into `Vec::with_capacity`. That field is the bsdiff header's target
116 // size (little-endian bytes 24..32) and is NOT validated by qbsdiff
117 // against the real payload, so a corrupt/hostile delta can claim an
118 // enormous size. A multi-exabyte `with_capacity` aborts the process
119 // (allocator failure) or panics with "capacity overflow" — neither is
120 // recoverable, which would let a single bad patch take the tool down.
121 //
122 // We build a genuine, small delta and then overwrite only the target
123 // size field with ~1.15 EiB. Because `apply` is driven by the control
124 // stream and ignores the hint, the clamp lets the patch still produce
125 // the correct bytes instead of dying on the allocation.
126 let before = b"the quick brown fox jumps over the lazy dog";
127 let after = b"the quick brown cat jumps over the lazy dog";
128 let mut forged = make_delta(before, after);
129 assert!(forged.len() >= 32, "delta must contain a full header");
130 // Stay positive (top bit clear) so qbsdiff decodes it as a large
131 // unsigned size rather than a negative offset.
132 let huge: u64 = 1 << 60;
133 forged[24..32].copy_from_slice(&huge.to_le_bytes());
134
135 let result = apply_diff(before, &forged).expect("clamped apply must succeed");
136 assert_eq!(
137 result, after,
138 "forging the size hint must not corrupt output"
139 );
140 }
141
142 #[test]
143 fn test_apply_diff_capacity_hint_is_clamped() {
144 // Pin the clamp itself so the bound can't silently regress back to an
145 // unbounded reservation. The output capacity is never reserved beyond
146 // MAX_PREALLOC_BYTES regardless of what the header claims.
147 let huge_hint: u64 = u64::MAX;
148 let clamped = huge_hint.min(MAX_PREALLOC_BYTES) as usize;
149 assert_eq!(clamped, MAX_PREALLOC_BYTES as usize);
150 // A modest, honest hint passes through untouched.
151 let small_hint: u64 = 4096;
152 assert_eq!(small_hint.min(MAX_PREALLOC_BYTES) as usize, 4096);
153 }
154}