lzma_rust2/filter/
bcj.rs

1//! Branch/Call/Jump Filters for executables of different architectures.
2
3mod arm;
4mod ia64;
5mod ppc;
6mod riscv;
7mod sparc;
8mod x86;
9
10use alloc::{vec, vec::Vec};
11
12use crate::Read;
13#[cfg(feature = "encoder")]
14use crate::Write;
15
16struct BcjFilter {
17    is_encoder: bool,
18    pos: usize,
19    prev_mask: u32,
20    filter: FilterFn,
21}
22
23type FilterFn = fn(filter: &mut BcjFilter, buf: &mut [u8]) -> usize;
24
25impl BcjFilter {
26    #[inline]
27    fn code(&mut self, buf: &mut [u8]) -> usize {
28        let filter = self.filter;
29        filter(self, buf)
30    }
31}
32
33const FILTER_BUF_SIZE: usize = 4096;
34
35/// Reader that applies BCJ (Branch/Call/Jump) filtering to compressed data.
36pub struct BcjReader<R> {
37    inner: R,
38    filter: BcjFilter,
39    state: State,
40}
41
42#[derive(Debug, Default)]
43struct State {
44    filter_buf: Vec<u8>,
45    pos: usize,
46    filtered: usize,
47    unfiltered: usize,
48    end_reached: bool,
49}
50
51impl<R> BcjReader<R> {
52    fn new(inner: R, filter: BcjFilter) -> Self {
53        Self {
54            inner,
55            filter,
56            state: State {
57                filter_buf: vec![0; FILTER_BUF_SIZE],
58                ..Default::default()
59            },
60        }
61    }
62
63    /// Unwraps the reader, returning the underlying reader.
64    pub fn into_inner(self) -> R {
65        self.inner
66    }
67
68    /// Returns a reference to the inner reader.
69    pub fn inner(&self) -> &R {
70        &self.inner
71    }
72
73    /// Returns a mutable reference to the inner reader.
74    pub fn inner_mut(&mut self) -> &mut R {
75        &mut self.inner
76    }
77
78    /// Creates a new BCJ reader for x86 instruction filtering.
79    #[inline]
80    pub fn new_x86(inner: R, start_pos: usize) -> Self {
81        Self::new(inner, BcjFilter::new_x86(start_pos, false))
82    }
83
84    /// Creates a new BCJ reader for ARM instruction filtering.
85    #[inline]
86    pub fn new_arm(inner: R, start_pos: usize) -> Self {
87        Self::new(inner, BcjFilter::new_arm(start_pos, false))
88    }
89
90    /// Creates a new BCJ reader for ARM64 instruction filtering.
91    #[inline]
92    pub fn new_arm64(inner: R, start_pos: usize) -> Self {
93        Self::new(inner, BcjFilter::new_arm64(start_pos, false))
94    }
95
96    /// Creates a new BCJ reader for ARM Thumb instruction filtering.
97    #[inline]
98    pub fn new_arm_thumb(inner: R, start_pos: usize) -> Self {
99        Self::new(inner, BcjFilter::new_arm_thumb(start_pos, false))
100    }
101
102    /// Creates a new BCJ reader for PowerPC instruction filtering.
103    #[inline]
104    pub fn new_ppc(inner: R, start_pos: usize) -> Self {
105        Self::new(inner, BcjFilter::new_power_pc(start_pos, false))
106    }
107
108    /// Creates a new BCJ reader for SPARC instruction filtering.
109    #[inline]
110    pub fn new_sparc(inner: R, start_pos: usize) -> Self {
111        Self::new(inner, BcjFilter::new_sparc(start_pos, false))
112    }
113
114    /// Creates a new BCJ reader for IA-64 instruction filtering.
115    #[inline]
116    pub fn new_ia64(inner: R, start_pos: usize) -> Self {
117        Self::new(inner, BcjFilter::new_ia64(start_pos, false))
118    }
119
120    /// Creates a new BCJ reader for RISC-V instruction filtering.
121    #[inline]
122    pub fn new_riscv(inner: R, start_pos: usize) -> Self {
123        Self::new(inner, BcjFilter::new_riscv(start_pos, false))
124    }
125}
126
127impl<R: Read> Read for BcjReader<R> {
128    fn read(&mut self, buf: &mut [u8]) -> crate::Result<usize> {
129        if buf.is_empty() {
130            return Ok(0);
131        }
132
133        let mut len = buf.len();
134        let mut state = core::mem::take(&mut self.state);
135        let mut off = 0;
136        let mut size = 0;
137
138        loop {
139            // Copy filtered data into the caller-provided buffer.
140            if state.filtered > 0 {
141                let copy_size = state.filtered.min(len);
142                let pos = state.pos;
143                buf[off..(off + copy_size)]
144                    .copy_from_slice(&state.filter_buf[pos..(pos + copy_size)]);
145                state.pos += copy_size;
146                state.filtered -= copy_size;
147                off += copy_size;
148                len -= copy_size;
149                size += copy_size;
150            }
151
152            // If end of filterBuf was reached, move the pending data to
153            // the beginning of the buffer so that more data can be
154            // copied into filterBuf on the next loop iteration.
155            if state.pos + state.filtered + state.unfiltered == FILTER_BUF_SIZE {
156                // state.filter_buf.copy_from_slice(src);
157                state.filter_buf.rotate_left(state.pos);
158                state.pos = 0;
159            }
160
161            if len == 0 || state.end_reached {
162                self.state = state;
163                return Ok(if size > 0 { size } else { 0 });
164            }
165
166            assert_eq!(state.filtered, 0);
167            // Get more data into the temporary buffer.
168            let mut in_size = FILTER_BUF_SIZE - (state.pos + state.filtered + state.unfiltered);
169            let start = state.pos + state.filtered + state.unfiltered;
170            let temp = &mut state.filter_buf[start..(start + in_size)];
171            in_size = match self.inner.read(temp) {
172                Ok(s) => s,
173                Err(error) => {
174                    self.state = state;
175                    return Err(error);
176                }
177            };
178
179            if in_size == 0 {
180                // Mark the remaining unfiltered bytes to be ready
181                // to be copied out.
182                state.end_reached = true;
183                state.filtered = state.unfiltered;
184                state.unfiltered = 0;
185            } else {
186                // Filter the data in filterBuf.
187                state.unfiltered += in_size;
188                state.filtered = self
189                    .filter
190                    .code(&mut state.filter_buf[state.pos..(state.pos + state.unfiltered)]);
191                assert!(state.filtered <= state.unfiltered);
192                state.unfiltered -= state.filtered;
193            }
194        }
195    }
196}
197
198/// Writer that applies BCJ (Branch/Call/Jump) filtering to data before compression.
199#[cfg(feature = "encoder")]
200pub struct BcjWriter<W> {
201    inner: W,
202    filter: BcjFilter,
203    buffer: Vec<u8>,
204}
205
206#[cfg(feature = "encoder")]
207impl<W> BcjWriter<W> {
208    fn new(inner: W, filter: BcjFilter) -> Self {
209        Self {
210            inner,
211            filter,
212            buffer: Vec::with_capacity(FILTER_BUF_SIZE),
213        }
214    }
215
216    /// Unwraps the writer, returning the underlying writer.
217    pub fn into_inner(self) -> W {
218        self.inner
219    }
220
221    /// Returns a reference to the inner writer.
222    pub fn inner(&self) -> &W {
223        &self.inner
224    }
225
226    /// Returns a mutable reference to the inner writer.
227    pub fn inner_mut(&mut self) -> &mut W {
228        &mut self.inner
229    }
230
231    /// Creates a new BCJ writer for x86 instruction filtering.
232    #[inline]
233    pub fn new_x86(inner: W, start_pos: usize) -> Self {
234        Self::new(inner, BcjFilter::new_x86(start_pos, true))
235    }
236
237    /// Creates a new BCJ writer for ARM instruction filtering.
238    #[inline]
239    pub fn new_arm(inner: W, start_pos: usize) -> Self {
240        Self::new(inner, BcjFilter::new_arm(start_pos, true))
241    }
242
243    /// Creates a new BCJ writer for ARM64 instruction filtering.
244    #[inline]
245    pub fn new_arm64(inner: W, start_pos: usize) -> Self {
246        Self::new(inner, BcjFilter::new_arm64(start_pos, true))
247    }
248
249    /// Creates a new BCJ writer for ARM Thumb instruction filtering.
250    #[inline]
251    pub fn new_arm_thumb(inner: W, start_pos: usize) -> Self {
252        Self::new(inner, BcjFilter::new_arm_thumb(start_pos, true))
253    }
254
255    /// Creates a new BCJ writer for PowerPC instruction filtering.
256    #[inline]
257    pub fn new_ppc(inner: W, start_pos: usize) -> Self {
258        Self::new(inner, BcjFilter::new_power_pc(start_pos, true))
259    }
260
261    /// Creates a new BCJ writer for SPARC instruction filtering.
262    #[inline]
263    pub fn new_sparc(inner: W, start_pos: usize) -> Self {
264        Self::new(inner, BcjFilter::new_sparc(start_pos, true))
265    }
266
267    /// Creates a new BCJ writer for IA-64 instruction filtering.
268    #[inline]
269    pub fn new_ia64(inner: W, start_pos: usize) -> Self {
270        Self::new(inner, BcjFilter::new_ia64(start_pos, true))
271    }
272
273    /// Creates a new BCJ writer for RISC-V instruction filtering.
274    #[inline]
275    pub fn new_riscv(inner: W, start_pos: usize) -> Self {
276        Self::new(inner, BcjFilter::new_riscv(start_pos, true))
277    }
278
279    /// Finishes writing by flushing any remaining unprocessed data.
280    /// This should be called when no more data will be written.
281    pub fn finish(mut self) -> crate::Result<W>
282    where
283        W: Write,
284    {
285        if !self.buffer.is_empty() {
286            // Write any remaining unprocessed data.
287            self.inner.write_all(&self.buffer)?;
288            self.buffer.clear();
289        }
290        self.inner.flush()?;
291        Ok(self.inner)
292    }
293}
294
295#[cfg(feature = "encoder")]
296impl<W: Write> Write for BcjWriter<W> {
297    fn write(&mut self, buf: &[u8]) -> crate::Result<usize> {
298        let original_len = buf.len();
299
300        self.buffer.extend_from_slice(buf);
301
302        let filtered_size = self.filter.code(&mut self.buffer);
303
304        if filtered_size > 0 {
305            self.inner.write_all(&self.buffer[..filtered_size])?;
306        }
307
308        if filtered_size < self.buffer.len() {
309            self.buffer.copy_within(filtered_size.., 0);
310            self.buffer.truncate(self.buffer.len() - filtered_size);
311        } else {
312            self.buffer.clear();
313        }
314
315        Ok(original_len)
316    }
317
318    fn flush(&mut self) -> crate::Result<()> {
319        self.inner.flush()
320    }
321}
322
323#[cfg(all(feature = "encoder", feature = "std"))]
324#[cfg(test)]
325mod tests {
326    use std::io::{copy, Cursor};
327
328    use super::*;
329
330    #[test]
331    fn test_bcj_x86_roundtrip() {
332        let test_data = std::fs::read("tests/data/wget-x86").unwrap();
333
334        let mut encoded_buffer = Vec::new();
335        let mut writer = BcjWriter::new_x86(Cursor::new(&mut encoded_buffer), 0);
336        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
337        writer.finish().expect("Failed to finish encoding");
338
339        assert!(test_data != encoded_buffer);
340
341        let mut decoded_data = Vec::new();
342        let mut reader = BcjReader::new_x86(Cursor::new(&encoded_buffer), 0);
343        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
344
345        assert!(test_data == decoded_data);
346    }
347
348    #[test]
349    fn test_bcj_arm_roundtrip() {
350        let test_data = std::fs::read("tests/data/wget-arm").unwrap();
351
352        let mut encoded_buffer = Vec::new();
353        let mut writer = BcjWriter::new_arm(Cursor::new(&mut encoded_buffer), 0);
354        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
355        writer.finish().expect("Failed to finish encoding");
356
357        assert!(test_data != encoded_buffer);
358
359        let mut decoded_data = Vec::new();
360        let mut reader = BcjReader::new_arm(Cursor::new(&encoded_buffer), 0);
361        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
362
363        assert!(test_data == decoded_data);
364    }
365
366    #[test]
367    fn test_bcj_arm64_roundtrip() {
368        let test_data = std::fs::read("tests/data/wget-arm64").unwrap();
369
370        let mut encoded_buffer = Vec::new();
371        let mut writer = BcjWriter::new_arm64(Cursor::new(&mut encoded_buffer), 0);
372        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
373        writer.finish().expect("Failed to finish encoding");
374
375        assert!(test_data != encoded_buffer);
376
377        let mut decoded_data = Vec::new();
378        let mut reader = BcjReader::new_arm64(Cursor::new(&encoded_buffer), 0);
379        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
380
381        assert!(test_data == decoded_data);
382    }
383
384    #[test]
385    fn test_bcj_arm_thumb_roundtrip() {
386        let test_data = std::fs::read("tests/data/wget-arm-thumb").unwrap();
387
388        let mut encoded_buffer = Vec::new();
389        let mut writer = BcjWriter::new_arm_thumb(Cursor::new(&mut encoded_buffer), 0);
390        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
391        writer.finish().expect("Failed to finish encoding");
392
393        assert!(test_data != encoded_buffer);
394
395        let mut decoded_data = Vec::new();
396        let mut reader = BcjReader::new_arm_thumb(Cursor::new(&encoded_buffer), 0);
397        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
398
399        assert!(test_data == decoded_data);
400    }
401
402    #[test]
403    fn test_bcj_ppc_roundtrip() {
404        let test_data = std::fs::read("tests/data/wget-ppc").unwrap();
405
406        let mut encoded_buffer = Vec::new();
407        let mut writer = BcjWriter::new_ppc(Cursor::new(&mut encoded_buffer), 0);
408        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
409        writer.finish().expect("Failed to finish encoding");
410
411        assert!(test_data != encoded_buffer);
412
413        let mut decoded_data = Vec::new();
414        let mut reader = BcjReader::new_ppc(Cursor::new(&encoded_buffer), 0);
415        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
416
417        assert!(test_data == decoded_data);
418    }
419
420    #[test]
421    fn test_bcj_sparc_roundtrip() {
422        let test_data = std::fs::read("tests/data/wget-sparc").unwrap();
423
424        let mut encoded_buffer = Vec::new();
425        let mut writer = BcjWriter::new_sparc(Cursor::new(&mut encoded_buffer), 0);
426        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
427        writer.finish().expect("Failed to finish encoding");
428
429        assert!(test_data != encoded_buffer);
430
431        let mut decoded_data = Vec::new();
432        let mut reader = BcjReader::new_sparc(Cursor::new(&encoded_buffer), 0);
433        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
434
435        assert!(test_data == decoded_data);
436    }
437
438    #[test]
439    fn test_bcj_ia64_roundtrip() {
440        let test_data = std::fs::read("tests/data/wget-ia64").unwrap();
441
442        let mut encoded_buffer = Vec::new();
443        let mut writer = BcjWriter::new_ia64(Cursor::new(&mut encoded_buffer), 0);
444        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
445        writer.finish().expect("Failed to finish encoding");
446
447        assert!(test_data != encoded_buffer);
448
449        let mut decoded_data = Vec::new();
450        let mut reader = BcjReader::new_ia64(Cursor::new(&encoded_buffer), 0);
451        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
452
453        assert!(test_data == decoded_data);
454    }
455
456    #[test]
457    fn test_bcj_riscv_roundtrip() {
458        let test_data = std::fs::read("tests/data/wget-riscv").unwrap();
459
460        let mut encoded_buffer = Vec::new();
461        let mut writer = BcjWriter::new_riscv(Cursor::new(&mut encoded_buffer), 0);
462        copy(&mut test_data.as_slice(), &mut writer).expect("Failed to encode data");
463        writer.finish().expect("Failed to finish encoding");
464
465        assert!(test_data != encoded_buffer);
466
467        let mut decoded_data = Vec::new();
468        let mut reader = BcjReader::new_riscv(Cursor::new(&encoded_buffer), 0);
469        copy(&mut reader, &mut decoded_data).expect("Failed to decode data");
470
471        assert!(test_data == decoded_data);
472    }
473}