lzma_rust2/filter/
bcj2.rs

1//! The BCJ2 filter is a branch converter for 32-bit x86 executables (version 2).
2
3mod decode;
4
5use alloc::{vec, vec::Vec};
6
7use decode::Bcj2Decoder;
8
9use crate::{error_invalid_data, Read};
10
11const BUF_SIZE: usize = 1 << 18;
12
13const BCJ2_NUM_STREAMS: usize = 4;
14
15const BCJ2_STREAM_MAIN: usize = 0;
16
17const BCJ2_STREAM_CALL: usize = 1;
18
19const BCJ2_STREAM_JUMP: usize = 2;
20
21const BCJ2_STREAM_RC: usize = 3;
22
23const BCJ2_DEC_STATE_ORIG_0: usize = BCJ2_NUM_STREAMS;
24
25const BCJ2_DEC_STATE_ORIG_3: usize = BCJ2_NUM_STREAMS + 3;
26
27const BCJ2_DEC_STATE_ORIG: usize = BCJ2_NUM_STREAMS + 4;
28
29const BCJ2_DEC_STATE_OK: usize = BCJ2_NUM_STREAMS + 5;
30
31const NUM_MODEL_BITS: u16 = 11;
32
33const BIT_MODEL_TOTAL: u16 = 1 << NUM_MODEL_BITS;
34
35const NUM_MOVE_BITS: u16 = 5;
36
37const K_TOP_VALUE: u32 = 1 << 24;
38
39#[inline(always)]
40const fn bcj2_is_32bit_stream(s: usize) -> bool {
41    (s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP
42}
43
44/// BCJ2 coder for x86 executables with separate streams for different instruction types.
45pub struct Bcj2Coder {
46    bufs: Vec<u8>,
47}
48
49impl Bcj2Coder {
50    fn buf_at(&mut self, i: usize) -> &mut [u8] {
51        let i = i * BUF_SIZE;
52        &mut self.bufs[i..i + BUF_SIZE]
53    }
54}
55
56impl Default for Bcj2Coder {
57    fn default() -> Self {
58        let buf_len = BUF_SIZE * (BCJ2_NUM_STREAMS);
59        Self {
60            bufs: vec![0; buf_len],
61        }
62    }
63}
64
65/// Reader for BCJ2-filtered data with multiple input streams.
66pub struct Bcj2Reader<R> {
67    base: Bcj2Coder,
68    inputs: Vec<R>,
69    decoder: Bcj2Decoder,
70    extra_read_sizes: [usize; BCJ2_NUM_STREAMS],
71    read_res: [bool; BCJ2_NUM_STREAMS],
72    uncompressed_size: u64,
73}
74
75impl<R> Bcj2Reader<R> {
76    /// Creates a new BCJ2 reader with the given input streams and expected output size.
77    pub fn new(inputs: Vec<R>, uncompressed_size: u64) -> Self {
78        Self {
79            base: Default::default(),
80            inputs,
81            decoder: Bcj2Decoder::new(),
82            extra_read_sizes: [0; BCJ2_NUM_STREAMS],
83            read_res: [true; BCJ2_NUM_STREAMS],
84            uncompressed_size,
85        }
86        .init()
87    }
88
89    fn init(mut self) -> Self {
90        let mut v = 0;
91        for i in 0..BCJ2_NUM_STREAMS {
92            self.decoder.bufs[i] = v;
93            self.decoder.lims[i] = v;
94            v += BUF_SIZE;
95        }
96
97        self
98    }
99}
100
101impl<R: Read> Read for Bcj2Reader<R> {
102    fn read(&mut self, buf: &mut [u8]) -> crate::Result<usize> {
103        let mut dest_buf = buf;
104        if dest_buf.len() > self.uncompressed_size as usize {
105            dest_buf = &mut dest_buf[..self.uncompressed_size as usize];
106        }
107        if dest_buf.is_empty() {
108            return Ok(0);
109        }
110        let mut result_size = 0;
111        self.decoder.set_dest(0);
112        let mut offset = 0;
113        loop {
114            if !self.decoder.decode(&mut self.base.bufs, dest_buf) {
115                return Err(error_invalid_data("bcj2 decode error"));
116            }
117
118            {
119                let cur_size = self.decoder.dest() - offset;
120                if cur_size != 0 {
121                    result_size += cur_size;
122                    self.uncompressed_size -= cur_size as u64;
123                    offset += cur_size;
124                }
125            }
126
127            if self.decoder.state >= BCJ2_NUM_STREAMS {
128                break;
129            }
130            let mut total_read = self.extra_read_sizes[self.decoder.state];
131            {
132                let buf_index = self.decoder.state * BUF_SIZE;
133                let from = self.decoder.bufs[self.decoder.state];
134                for i in 0..total_read {
135                    let b = self.base.bufs[from + i];
136                    self.base.bufs[buf_index + i] = b;
137                }
138                self.decoder.lims[self.decoder.state] = buf_index;
139                self.decoder.bufs[self.decoder.state] = buf_index;
140            }
141            if !self.read_res[self.decoder.state] {
142                return Err(error_invalid_data("bcj2 decode error:2"));
143            }
144
145            loop {
146                let cur_size = BUF_SIZE - total_read;
147                let cur_size = self.inputs[self.decoder.state].read(
148                    &mut self.base.buf_at(self.decoder.state)[total_read..total_read + cur_size],
149                )?;
150                if cur_size == 0 {
151                    break;
152                }
153                total_read += cur_size;
154                if !(total_read < 4 && bcj2_is_32bit_stream(self.decoder.state)) {
155                    break;
156                }
157            }
158
159            if total_read == 0 {
160                break;
161            }
162
163            if bcj2_is_32bit_stream(self.decoder.state) {
164                let extra_size = total_read & 3;
165                self.extra_read_sizes[self.decoder.state] = extra_size;
166                if total_read < 4 {
167                    if result_size != 0 {
168                        return Ok(result_size);
169                    }
170                    return Err(error_invalid_data("bcj2 decode error:3"));
171                }
172                total_read -= extra_size;
173            }
174            self.decoder.lims[self.decoder.state] = total_read + self.decoder.state * BUF_SIZE;
175        }
176
177        if self.uncompressed_size == 0 {
178            if self.decoder.code != 0 {
179                return Err(error_invalid_data("bcj2 decode error:4"));
180            }
181            if self.decoder.state != BCJ2_STREAM_MAIN && self.decoder.state != BCJ2_DEC_STATE_ORIG {
182                return Err(error_invalid_data("bcj2 decode error:5"));
183            }
184        }
185        Ok(result_size)
186    }
187}