bsdiff_android/
bsdf2.rs

1// bsdf2.rs - Android BSDF2 format
2
3use std::io::{self, Read};
4
5const BSDIFF_MAGIC: &[u8; 8] = b"BSDIFF40";
6const BSDF2_MAGIC: &[u8; 5] = b"BSDF2";
7
8// Safety limits to prevent OOM
9// const MAX_PATCH_SIZE: usize = 512 * 1024 * 1024; // 512 MB
10const MAX_NEW_SIZE: usize = 2 * 1024 * 1024 * 1024; // 2 GB
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum CompressionAlgorithm {
14    None = 0,
15    Bz2 = 1,
16    Brotli = 2,
17}
18
19impl CompressionAlgorithm {
20    fn from_u8(value: u8) -> io::Result<Self> {
21        match value {
22            0 => Ok(Self::None),
23            1 => Ok(Self::Bz2),
24            2 => Ok(Self::Brotli),
25            _ => Err(io::Error::new(
26                io::ErrorKind::InvalidData,
27                format!("Unknown compression algorithm: {}", value),
28            )),
29        }
30    }
31}
32
33/// Reads sign-magnitude i64 as used in bspatch
34/// This is NOT plain little-endian - it uses sign-magnitude encoding
35#[inline]
36fn offtin(buf: [u8; 8]) -> i64 {
37    let y = i64::from_le_bytes(buf);
38    if 0 == y & (1 << 63) {
39        y
40    } else {
41        -(y & !(1 << 63))
42    }
43}
44
45/// Decompress data based on algorithm
46fn decompress(alg: CompressionAlgorithm, data: &[u8]) -> io::Result<Vec<u8>> {
47    match alg {
48        CompressionAlgorithm::None => Ok(data.to_vec()),
49        CompressionAlgorithm::Bz2 => {
50            let mut decompressed = Vec::new();
51            let mut decoder = bzip2::read::BzDecoder::new(data);
52            decoder.read_to_end(&mut decompressed)?;
53            Ok(decompressed)
54        }
55        CompressionAlgorithm::Brotli => {
56            let mut decompressed = Vec::new();
57            let mut decoder = brotli::Decompressor::new(data, 4096);
58            decoder.read_to_end(&mut decompressed)?;
59            Ok(decompressed)
60        }
61    }
62}
63
64/// Parse BSDF2 or classic BSDIFF patch header and return streams
65pub fn parse_bsdf2_header(
66    patch_data: &[u8],
67) -> io::Result<(i64, Vec<u8>, Vec<u8>, Vec<u8>)> {
68    if patch_data.len() < 32 {
69        return Err(io::Error::new(
70            io::ErrorKind::InvalidData,
71            "Patch data too short",
72        ));
73    }
74
75    let magic = &patch_data[0..8];
76
77    // Determine format and compression algorithms
78    let (alg_control, alg_diff, alg_extra) = if magic == BSDIFF_MAGIC {
79        // Classic BSDIFF format - uses BZ2 for all streams
80        (
81            CompressionAlgorithm::Bz2,
82            CompressionAlgorithm::Bz2,
83            CompressionAlgorithm::Bz2,
84        )
85    } else if &magic[0..5] == BSDF2_MAGIC {
86        // BSDF2 format - per-stream compression
87        (
88            CompressionAlgorithm::from_u8(magic[5])?,
89            CompressionAlgorithm::from_u8(magic[6])?,
90            CompressionAlgorithm::from_u8(magic[7])?,
91        )
92    } else {
93        return Err(io::Error::new(
94            io::ErrorKind::InvalidData,
95            "Invalid BSDIFF/BSDF2 magic header",
96        ));
97    };
98
99    // Read length headers using bspatch integer encoding
100    let len_control = offtin(patch_data[8..16].try_into().unwrap());
101    let len_diff = offtin(patch_data[16..24].try_into().unwrap());
102    let new_size = offtin(patch_data[24..32].try_into().unwrap());
103
104    // Validate lengths before allocation
105    if len_control < 0 || len_diff < 0 || new_size < 0 {
106        return Err(io::Error::new(
107            io::ErrorKind::InvalidData,
108            "Negative length in patch header",
109        ));
110    }
111
112    let len_control = len_control as usize;
113    let len_diff = len_diff as usize;
114    let new_size_usize = new_size as usize;
115
116    // Safety checks before allocation
117    if new_size_usize > MAX_NEW_SIZE {
118        return Err(io::Error::new(
119            io::ErrorKind::InvalidData,
120            format!("New size {} exceeds limit", new_size_usize),
121        ));
122    }
123
124    
125    
126    let pos: usize = 32;
127
128
129    // Validate lengths don't exceed patch bounds
130    if pos.checked_add(len_control)
131        .and_then(|p| p.checked_add(len_diff))
132        .map_or(true, |total| total > patch_data.len())
133    {
134        return Err(io::Error::new(
135            io::ErrorKind::InvalidData,
136            "Stream lengths exceed patch size",
137        ));
138    }
139
140    // Read and decompress control stream
141    let control_end = pos + len_control;
142    if control_end > patch_data.len() {
143        return Err(io::Error::new(
144            io::ErrorKind::InvalidData,
145            "Control stream exceeds patch bounds",
146        ));
147    }
148    let control_compressed = &patch_data[pos..control_end];
149    let control_data = decompress(alg_control, control_compressed)?;
150
151    // Validate control data is properly aligned (24 bytes per tuple)
152    if control_data.len() % 24 != 0 {
153        return Err(io::Error::new(
154            io::ErrorKind::InvalidData,
155            "Invalid control data length (not multiple of 24)",
156        ));
157    }
158
159    // Read and decompress diff stream
160    let diff_start = control_end;
161    let diff_end = diff_start + len_diff;
162    if diff_end > patch_data.len() {
163        return Err(io::Error::new(
164            io::ErrorKind::InvalidData,
165            "Diff stream exceeds patch bounds",
166        ));
167    }
168    let diff_compressed = &patch_data[diff_start..diff_end];
169    let diff_data = decompress(alg_diff, diff_compressed)?;
170
171    // Read and decompress extra stream (rest of data)
172    let extra_compressed = &patch_data[diff_end..];
173    let extra_data = decompress(alg_extra, extra_compressed)?;
174
175    Ok((new_size, control_data, diff_data, extra_data))
176}
177
178/// Apply a BSDF2 patch with full validation and optimizations
179pub fn patch_bsdf2(old: &[u8], patch_data: &[u8], new: &mut Vec<u8>) -> io::Result<()> {
180    // Parse header and decompress streams
181    let (new_size, control_data, diff_data, extra_data) = parse_bsdf2_header(patch_data)?;
182
183    let new_size = new_size as usize;
184
185    // Pre-allocate output buffer
186    new.clear();
187    new.reserve(new_size);
188
189    let mut oldpos: usize = 0;
190    let mut diff_pos: usize = 0;
191    let mut extra_pos: usize = 0;
192
193    // Process control tuples
194    let mut ctrl_idx = 0;
195    while ctrl_idx < control_data.len() {
196        if ctrl_idx + 24 > control_data.len() {
197            return Err(io::Error::new(
198                io::ErrorKind::InvalidData,
199                "Incomplete control tuple",
200            ));
201        }
202
203        // Read control tuple using bspatch integer encoding
204        let add_len = offtin(control_data[ctrl_idx..ctrl_idx + 8].try_into().unwrap());
205        let copy_len = offtin(control_data[ctrl_idx + 8..ctrl_idx + 16].try_into().unwrap());
206        let seek_amount = offtin(control_data[ctrl_idx + 16..ctrl_idx + 24].try_into().unwrap());
207        ctrl_idx += 24;
208
209        // Validate lengths
210        if add_len < 0 || copy_len < 0 {
211            return Err(io::Error::new(
212                io::ErrorKind::InvalidData,
213                format!("Negative length in control tuple: add={}, copy={}", add_len, copy_len),
214            ));
215        }
216
217        let add_len = add_len as usize;
218        let copy_len = copy_len as usize;
219
220        // Check we won't exceed output size
221        if new.len().checked_add(add_len)
222            .and_then(|n| n.checked_add(copy_len))
223            .map_or(true, |total| total > new_size)
224        {
225            return Err(io::Error::new(
226                io::ErrorKind::InvalidData,
227                "Control tuple would exceed new_size",
228            ));
229        }
230
231        // ADD operation: new[newpos..newpos+add] = old[oldpos..] + diff[diff_pos..]
232        if add_len > 0 {
233            // Check diff_data bounds
234            if diff_pos.checked_add(add_len).map_or(true, |end| end > diff_data.len()) {
235                return Err(io::Error::new(
236                    io::ErrorKind::InvalidData,
237                    "Diff data exhausted",
238                ));
239            }
240
241            // Optimized: reserve space and write directly
242            let new_start = new.len();
243            new.resize(new_start + add_len, 0);
244
245            // SIMD-friendly loop: compute in chunks
246            for i in 0..add_len {
247                let old_byte = old.get(oldpos + i).copied().unwrap_or(0);
248                let diff_byte = diff_data[diff_pos + i];
249                new[new_start + i] = old_byte.wrapping_add(diff_byte);
250            }
251
252            oldpos = oldpos.saturating_add(add_len);
253            diff_pos += add_len;
254        }
255
256        // COPY operation: new[newpos..newpos+copy] = extra[extra_pos..]
257        if copy_len > 0 {
258            // Check extra_data bounds
259            if extra_pos.checked_add(copy_len).map_or(true, |end| end > extra_data.len()) {
260                return Err(io::Error::new(
261                    io::ErrorKind::InvalidData,
262                    "Extra data exhausted",
263                ));
264            }
265
266            new.extend_from_slice(&extra_data[extra_pos..extra_pos + copy_len]);
267            extra_pos += copy_len;
268        }
269
270        // SEEK operation: adjust oldpos
271        // CRITICAL: Validate seek doesn't underflow
272        let new_oldpos = (oldpos as i64)
273            .checked_add(seek_amount)
274            .ok_or_else(|| {
275                io::Error::new(io::ErrorKind::InvalidData, "Seek overflow")
276            })?;
277
278        if new_oldpos < 0 {
279            return Err(io::Error::new(
280                io::ErrorKind::InvalidData,
281                format!("Seek underflow: oldpos={}, seek={}", oldpos, seek_amount),
282            ));
283        }
284
285        oldpos = new_oldpos as usize;
286    }
287
288    // Validate final state
289    if new.len() != new_size {
290        return Err(io::Error::new(
291            io::ErrorKind::InvalidData,
292            format!("Final size mismatch: expected {}, got {}", new_size, new.len()),
293        ));
294    }
295
296    // Validate all streams were fully consumed
297    if diff_pos != diff_data.len() {
298        return Err(io::Error::new(
299            io::ErrorKind::InvalidData,
300            format!("Diff data not fully consumed: used {}/{}", diff_pos, diff_data.len()),
301        ));
302    }
303
304    if extra_pos != extra_data.len() {
305        return Err(io::Error::new(
306            io::ErrorKind::InvalidData,
307            format!("Extra data not fully consumed: used {}/{}", extra_pos, extra_data.len()),
308        ));
309    }
310
311    Ok(())
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317
318    #[test]
319    fn test_offtin_positive() {
320        let buf = [0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
321        assert_eq!(offtin(buf), 0x42);
322    }
323
324    #[test]
325    fn test_offtin_negative() {
326        // Sign bit set: 0x8000000000000042 represents -66
327        let buf = [0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80];
328        assert_eq!(offtin(buf), -0x42);
329    }
330
331    #[test]
332    fn test_compression_algorithm_from_u8() {
333        assert_eq!(CompressionAlgorithm::from_u8(0).unwrap(), CompressionAlgorithm::None);
334        assert_eq!(CompressionAlgorithm::from_u8(1).unwrap(), CompressionAlgorithm::Bz2);
335        assert_eq!(CompressionAlgorithm::from_u8(2).unwrap(), CompressionAlgorithm::Brotli);
336        assert!(CompressionAlgorithm::from_u8(3).is_err());
337    }
338
339    #[test]
340    fn test_parse_invalid_magic() {
341        let invalid = vec![0u8; 32];
342        assert!(parse_bsdf2_header(&invalid).is_err());
343    }
344
345    #[test]
346    fn test_parse_negative_lengths() {
347        let mut data = vec![0u8; 32];
348        data[0..8].copy_from_slice(BSDIFF_MAGIC);
349        // Set negative length (sign bit set)
350        data[8] = 0x01;
351        data[15] = 0x80; // Sign bit
352        
353        assert!(parse_bsdf2_header(&data).is_err());
354    }
355}