1use crate::bitio::direction::left::Left;
9use crate::bitio::reader::{BitRead, BitReader};
10use crate::bitset::BitArray;
11use crate::bzip2::error::BZip2Error;
12use crate::bzip2::mtf::MtfPositionDecoder;
13use crate::bzip2::{HEADER_h, BZ_G_SIZE, HEADER_0, HEADER_B, HEADER_Z};
14use crate::core::hash::{BuildHasher, Hasher};
15use crate::crc32::{BuiltinDigest, IEEE_NORMAL};
16use crate::huffman::decoder::HuffmanDecoder;
17use crate::traits::decoder::{BitDecodeService, BitDecoderImpl, Decoder};
18#[cfg(not(feature = "std"))]
19use alloc::string::String;
20#[cfg(not(feature = "std"))]
21#[allow(unused_imports)]
22use alloc::vec;
23#[cfg(not(feature = "std"))]
24use alloc::vec::Vec;
25use log::debug;
26
27const BZ2_R_NUMS: [usize; 512] = [
28 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863,
29 491, 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
30 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 878, 465, 811, 169, 869,
31 675, 611, 697, 867, 561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
32 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727,
33 476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
34 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641, 801, 220, 162, 819,
35 984, 589, 513, 495, 799, 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
36 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 98, 553, 163, 354, 666,
37 933, 424, 341, 533, 870, 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
38 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383,
39 461, 404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
40 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970, 447, 318,
41 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
42 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 653, 282, 762, 623, 680,
43 81, 927, 626, 789, 125, 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
44 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 857, 956, 358, 619, 580,
45 124, 737, 594, 701, 612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
46 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511,
47 655, 814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
48 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754, 806, 760, 493,
49 403, 415, 394, 687, 700, 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
50 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 680, 879, 194, 572, 640,
51 724, 926, 56, 204, 700, 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
52 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 134, 108, 571, 364, 631,
53 212, 174, 643, 304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
54 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692, 829,
55 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
56 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 896, 831, 547, 261, 524,
57 462, 293, 465, 502, 56, 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
58 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 61, 688, 793, 644, 986,
59 403, 106, 366, 905, 644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
60 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857,
61 265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
62 936, 638,
63];
64
65#[derive(Debug)]
66struct BlockRandomise {
67 n2go: usize,
68 t_pos: usize,
69}
70
71impl BlockRandomise {
72 pub(crate) fn new() -> Self {
73 Self { n2go: 0, t_pos: 0 }
74 }
75
76 pub(crate) fn reset(&mut self) {
77 self.n2go = 0;
78 self.t_pos = 0;
79 }
80
81 pub(crate) fn next(&mut self) -> bool {
82 if self.n2go == 0 {
83 self.n2go = BZ2_R_NUMS[self.t_pos];
84 self.t_pos += 1;
85 if self.t_pos == 512 {
86 self.t_pos = 0;
87 }
88 }
89 self.n2go -= 1;
90 self.n2go == 1
91 }
92}
93
94#[derive(Debug)]
95pub(crate) struct BZip2DecoderBase {
96 block_no: usize,
97 block_size_100k: usize,
98 combined_crc: u32,
99 block_crc: u32,
100 block_crc_digest: BuiltinDigest,
101 tt: Vec<u32>,
102 n_block_used: usize,
103 t_pos: u32,
104 block_randomise: BlockRandomise,
105 block_randomised: bool,
106 result_count: usize,
107 result_wrote_count: usize,
108 result_charactor: u8,
109 stream_no: usize,
110}
111
112impl Default for BZip2DecoderBase {
113 fn default() -> Self {
114 Self::new()
115 }
116}
117
118impl BZip2DecoderBase {
119 const RUN_A: u16 = 0;
120 const RUN_B: u16 = 1;
121
122 pub(crate) fn new() -> Self {
123 Self {
124 block_no: 0,
125 block_size_100k: 0,
126 combined_crc: 0,
127 block_crc: 0,
128 block_crc_digest: IEEE_NORMAL.build_hasher(),
129 tt: Vec::new(),
130 n_block_used: 0,
131 t_pos: 0,
132 block_randomise: BlockRandomise::new(),
133 block_randomised: false,
134 result_count: 0,
135 result_wrote_count: 0,
136 result_charactor: 0,
137 stream_no: 1,
138 }
139 }
140
141 fn read_u8<R: BitRead, I: Iterator<Item = u8>>(
142 reader: &mut R,
143 iter: &mut I,
144 ) -> Result<u8, String> {
145 reader.read_bits(8, iter).map(|x| x.data())
146 }
147
148 fn read_u32<R: BitRead, I: Iterator<Item = u8>>(
149 reader: &mut R,
150 iter: &mut I,
151 ) -> Result<u32, String> {
152 reader.read_bits(32, iter).map(|x| x.data())
153 }
154
155 fn check_u8<R: BitRead, I: Iterator<Item = u8>>(
156 reader: &mut R,
157 iter: &mut I,
158 value: u8,
159 ) -> Result<bool, String> {
160 Self::read_u8(reader, iter).map(|x| x == value)
161 }
162
163 fn init_block<R: BitRead, I: Iterator<Item = u8>>(
164 &mut self,
165 reader: &mut R,
166 iter: &mut I,
167 ) -> Result<bool, BZip2Error> {
168 loop {
169 if self.block_no == 0 {
170 let magic_err = if self.stream_no == 1 {
171 BZip2Error::DataErrorMagicFirst
172 } else {
173 BZip2Error::DataErrorMagic
174 };
175 let _ = Self::check_u8(reader, iter, HEADER_B)
176 .map_err(|_| magic_err)?;
177 let _ = Self::check_u8(reader, iter, HEADER_Z)
178 .map_err(|_| magic_err)?;
179 let _ = Self::check_u8(reader, iter, HEADER_h)
180 .map_err(|_| magic_err)?;
181 self.block_size_100k = {
182 let b = Self::read_u8(reader, iter)
183 .map_err(|_| BZip2Error::UnexpectedEof)?;
184 if b < 1 + HEADER_0 || b > 9 + HEADER_0 {
185 return Err(magic_err);
186 }
187 usize::from(b - HEADER_0)
188 };
189 } else {
190 let data_block_crc = self.block_crc_digest.finish() as u32;
191 debug!(
192 " {{0x{:08x}, 0x{:08x}}}]",
193 self.block_crc, data_block_crc
194 );
195 if data_block_crc != self.block_crc {
196 return Err(BZip2Error::DataError);
197 }
198 self.combined_crc = ((self.combined_crc << 1)
199 | (self.combined_crc >> 31))
200 ^ self.block_crc;
201 self.block_crc_digest = IEEE_NORMAL.build_hasher();
202 }
203
204 let block_head_byte = Self::read_u8(reader, iter)
205 .map_err(|_| BZip2Error::UnexpectedEof)?;
206
207 if block_head_byte == 0x31 {
208 let _ = Self::check_u8(reader, iter, 0x41)
209 .map_err(|_| BZip2Error::DataError)?;
210
211 let _ = Self::check_u8(reader, iter, 0x59)
212 .map_err(|_| BZip2Error::DataError)?;
213
214 let _ = Self::check_u8(reader, iter, 0x26)
215 .map_err(|_| BZip2Error::DataError)?;
216
217 let _ = Self::check_u8(reader, iter, 0x53)
218 .map_err(|_| BZip2Error::DataError)?;
219
220 let _ = Self::check_u8(reader, iter, 0x59)
221 .map_err(|_| BZip2Error::DataError)?;
222 self.block_no += 1;
223 debug!(" [{}: huff+mtf ", self.block_no);
224
225 self.block_crc = Self::read_u32(reader, iter)
226 .map_err(|_| BZip2Error::UnexpectedEof)?;
227 self.block_randomised = reader
228 .read_bits::<u8, _>(1, iter)
229 .map_err(|_| BZip2Error::UnexpectedEof)?
230 .data()
231 == 1;
232
233 let orig_pos = reader
234 .read_bits::<u32, _>(24, iter)
235 .map_err(|_| BZip2Error::UnexpectedEof)?
236 .data() as usize;
237
238 if orig_pos > 10 + 100_000 * self.block_size_100k {
239 return Err(BZip2Error::DataError);
240 }
241
242 let seq2unseq = {
244 let mut in_use16 = BitArray::new(16);
245 for i in 0..16 {
246 in_use16.set(
247 i,
248 reader
249 .read_bits::<u8, _>(1, iter)
250 .map_err(|_| BZip2Error::UnexpectedEof)?
251 .data()
252 == 1,
253 );
254 }
255
256 let mut ret = Vec::with_capacity(256);
257 for (i, _) in
258 in_use16.iter().enumerate().filter(|&(_, x)| x)
259 {
260 for j in 0..16 {
261 if reader
262 .read_bits::<u8, _>(1, iter)
263 .map_err(|_| BZip2Error::UnexpectedEof)?
264 .data()
265 == 1
266 {
267 ret.push(i * 16 + j)
268 }
269 }
270 }
271 ret
272 };
273
274 if seq2unseq.is_empty() {
275 return Err(BZip2Error::DataError);
276 }
277
278 let alpha_size = seq2unseq.len() + 2;
279
280 let n_groups = reader
282 .read_bits(3, iter)
283 .map_err(|_| BZip2Error::UnexpectedEof)?
284 .data();
285 if n_groups < 2 || n_groups > 6 {
286 return Err(BZip2Error::DataError);
287 }
288 let n_selectors = reader
289 .read_bits(15, iter)
290 .map_err(|_| BZip2Error::UnexpectedEof)?
291 .data();
292 if n_selectors < 1 {
293 return Err(BZip2Error::DataError);
294 }
295
296 let mut selector = Vec::with_capacity(n_selectors);
297 {
298 let mut selector_mtf_dec =
299 MtfPositionDecoder::new(n_groups);
300 for _ in 0..n_selectors {
301 let mut j = 0;
302 while reader
303 .read_bits::<u8, _>(1, iter)
304 .map_err(|_| BZip2Error::UnexpectedEof)?
305 .data()
306 != 0
307 {
308 j += 1;
309 if j >= n_groups {
310 return Err(BZip2Error::DataError);
311 }
312 }
313 selector.push(selector_mtf_dec.pop(j));
315 }
316 }
317
318 let mut len = vec![vec![0; alpha_size]; n_groups];
319 for t in &mut len {
321 let mut curr = reader
322 .read_bits::<u8, _>(5, iter)
323 .map_err(|_| BZip2Error::UnexpectedEof)?
324 .data();
325 for i in t.iter_mut() {
326 while reader
327 .read_bits::<u8, _>(1, iter)
328 .map_err(|_| BZip2Error::UnexpectedEof)?
329 .data()
330 != 0
331 {
332 if curr < 1 || curr > 20 {
333 return Err(BZip2Error::DataError);
334 }
335 if reader
336 .read_bits::<u8, _>(1, iter)
337 .map_err(|_| BZip2Error::UnexpectedEof)?
338 .data()
339 == 0
340 {
341 curr += 1;
342 } else {
343 curr -= 1;
344 }
345 }
346 *i = curr;
347 }
348 }
349
350 let mut code = Vec::with_capacity(n_groups);
352 for l in &len {
353 code.push(
354 HuffmanDecoder::<Left>::new(l, 12)
355 .map_err(|_| BZip2Error::DataError)?,
356 );
357 }
358
359 let eob = alpha_size as u16 - 1;
361 let nblock_max = 100_000 * self.block_size_100k;
362
363 let mut unzftab = vec![0; 257]; self.tt.clear();
365 self.tt.reserve_exact(nblock_max);
366
367 {
368 let mut group_no = 0;
369 let mut group_pos = 0;
370 let mut n = 1;
371 let mut es = 0;
372
373 let mut mtf_decoder =
374 MtfPositionDecoder::new(seq2unseq.len());
375
376 loop {
377 if group_pos == 0 {
378 group_no += 1;
379 if group_no > n_selectors {
380 return Err(BZip2Error::DataError);
381 }
382 group_pos = BZ_G_SIZE;
383 }
384 group_pos -= 1;
385 let next_sym = code[selector[group_no - 1]]
386 .dec(reader, iter)
387 .map_err(|_| BZip2Error::DataError)?
388 .ok_or_else(|| BZip2Error::DataError)?;
389
390 if es > 0
391 && next_sym != Self::RUN_A
392 && next_sym != Self::RUN_B
393 {
394 let uc = seq2unseq[mtf_decoder.pop(0)];
395 unzftab[uc + 1] += es;
396 for _ in 0..es {
397 self.tt.push(uc as u32);
398 }
399 if self.tt.len() >= nblock_max {
400 return Err(BZip2Error::DataError);
401 }
402 n = 1;
403 es = 0;
404 }
405
406 if next_sym == eob {
407 break;
408 }
409
410 if n >= 2 * 1024 * 1024 {
417 return Err(BZip2Error::DataError);
418 }
419
420 if next_sym == Self::RUN_A {
421 es += n;
422 n <<= 1;
423 } else if next_sym == Self::RUN_B {
424 n <<= 1;
425 es += n;
426 } else {
427 if self.tt.len() >= nblock_max {
428 return Err(BZip2Error::DataError);
429 }
430
431 let uc = seq2unseq
432 [mtf_decoder.pop(next_sym as usize - 1)];
433 unzftab[uc + 1] += 1;
434 self.tt.push(uc as u32);
435 }
436 }
437 }
438
439 if orig_pos >= self.tt.len() {
442 return Err(BZip2Error::DataError);
443 }
444
445 if unzftab[0] != 0 {
448 return Err(BZip2Error::DataError);
449 }
450
451 for i in 1..unzftab.len() {
452 unzftab[i] += unzftab[i - 1];
456 if unzftab[i - 1] > unzftab[i] {
458 return Err(BZip2Error::DataError);
459 }
460 }
461 if unzftab[unzftab.len() - 1] != self.tt.len() {
463 return Err(BZip2Error::DataError);
464 }
465
466 debug!("rt+rld");
467
468 for i in 0..self.tt.len() {
470 let uc = (self.tt[i] & 0xFF) as usize;
471 self.tt[unzftab[uc]] |= (i as u32) << 8;
472 unzftab[uc] += 1;
473 }
474
475 self.t_pos = self.tt[orig_pos] >> 8;
476 self.n_block_used = 0;
477
478 if self.block_randomised {
479 self.block_randomise.reset();
480 }
481
482 self.result_count = 0;
483 self.result_wrote_count = 0;
484
485 return Ok(true);
486 } else if block_head_byte == 0x17 {
487 let _ = Self::check_u8(reader, iter, 0x72)
488 .map_err(|_| BZip2Error::DataError)?;
489
490 let _ = Self::check_u8(reader, iter, 0x45)
491 .map_err(|_| BZip2Error::DataError)?;
492
493 let _ = Self::check_u8(reader, iter, 0x38)
494 .map_err(|_| BZip2Error::DataError)?;
495
496 let _ = Self::check_u8(reader, iter, 0x50)
497 .map_err(|_| BZip2Error::DataError)?;
498
499 let _ = Self::check_u8(reader, iter, 0x90)
500 .map_err(|_| BZip2Error::DataError)?;
501 let stored_combind_crc = Self::read_u32(reader, iter)
502 .map_err(|_| BZip2Error::UnexpectedEof)?;
503 debug!(
504 " combined CRCs: stored = 0x{:08x}, computed = 0x{:08x}",
505 stored_combind_crc, self.combined_crc
506 );
507 if stored_combind_crc != self.combined_crc {
508 return Err(BZip2Error::DataError);
509 }
510 let _ = reader.skip_to_next_byte();
511 let next = reader
512 .peek_bits::<usize, _>(8, iter)
513 .map_err(|_| BZip2Error::Unexpected)?;
514 if next.len() == 8 {
515 self.block_no = 0;
516 self.combined_crc = 0;
517 self.stream_no += 1;
518 } else {
519 return Ok(false);
520 }
521 } else {
522 return Err(BZip2Error::DataError);
523 }
524 }
525 }
526
527 fn get_next_lfm(&mut self) -> Result<u8, BZip2Error> {
528 let mut position = self.t_pos;
529 if position >= 100_000 * self.block_size_100k as u32 {
531 return Err(BZip2Error::DataError);
532 }
533 position = self.tt[position as usize];
534 let mut k0 = position as u8;
535 self.t_pos = position >> 8;
536 self.n_block_used += 1;
537 if self.block_randomised {
538 k0 ^= if self.block_randomise.next() { 1 } else { 0 };
539 }
540
541 Ok(k0)
542 }
543}
544
545impl BitDecodeService for BZip2DecoderBase {
546 type Direction = Left;
547 type Error = BZip2Error;
548 type Output = u8;
549
550 fn next<I: Iterator<Item = u8>>(
551 &mut self,
552 reader: &mut BitReader<Self::Direction>,
553 iter: &mut I,
554 ) -> Result<Option<u8>, Self::Error> {
555 if self.result_count == self.result_wrote_count {
556 if self.n_block_used == self.tt.len()
557 && !self.init_block(reader, iter)?
558 {
559 return Ok(None);
560 }
561
562 let buffer = self.get_next_lfm()?;
563 if buffer == self.result_charactor && self.result_count < 4 {
564 self.result_count += 1;
565 self.result_wrote_count += 1;
566 } else {
567 self.result_charactor = buffer;
568 self.result_count = 1;
569 self.result_wrote_count = 1;
570 }
571
572 if self.result_count == 4 {
573 self.result_count += usize::from(self.get_next_lfm()?);
574 }
575 } else {
576 self.result_wrote_count += 1;
577 }
578 self.block_crc_digest.write_u8(self.result_charactor);
579 Ok(Some(self.result_charactor))
580 }
581}
582
583#[derive(Debug)]
584pub struct BZip2Decoder {
585 inner: BitDecoderImpl<BZip2DecoderBase>,
586}
587
588impl BZip2Decoder {
589 pub fn new() -> Self {
590 Self {
591 inner: BitDecoderImpl::<BZip2DecoderBase>::new(),
592 }
593 }
594}
595
596impl Default for BZip2Decoder {
597 fn default() -> Self {
598 Self {
599 inner: BitDecoderImpl::<BZip2DecoderBase>::new(),
600 }
601 }
602}
603
604impl Decoder for BZip2Decoder {
605 type Input = u8;
606 type Output = u8;
607 type Error = BZip2Error;
608
609 fn next<I: Iterator<Item = Self::Input>>(
610 &mut self,
611 iter: &mut I,
612 ) -> Option<Result<Self::Output, Self::Error>> {
613 self.inner.next(iter)
614 }
615}