1use crate::{Error, Result};
2use std::io::{Read, Write};
3
4const MATCH_HASH_BUCKETS: usize = 4096;
5const MAX_LONG_MATCH_CANDIDATES: usize = 64;
6const MAX_LONG_LZ_DISTANCE: usize = 0x7fff;
7
8const DEC_L1: &[u16] = &[
9 0x8000, 0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf200, 0xffff,
10];
11const POS_L1: &[u16] = &[0, 0, 0, 2, 3, 5, 7, 11, 16, 20, 24, 32, 32];
12const DEC_L2: &[u16] = &[
13 0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf240, 0xffff,
14];
15const POS_L2: &[u16] = &[0, 0, 0, 0, 5, 7, 9, 13, 18, 22, 26, 34, 36];
16const DEC_HF0: &[u16] = &[
17 0x8000, 0xc000, 0xe000, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xffff,
18];
19const POS_HF0: &[u16] = &[0, 0, 0, 0, 0, 8, 16, 24, 33, 33, 33, 33, 33];
20const DEC_HF1: &[u16] = &[
21 0x2000, 0xc000, 0xe000, 0xf000, 0xf200, 0xf200, 0xf7e0, 0xffff,
22];
23const POS_HF1: &[u16] = &[0, 0, 0, 0, 0, 0, 4, 44, 60, 76, 80, 80, 127];
24const DEC_HF2: &[u16] = &[
25 0x1000, 0x2400, 0x8000, 0xc000, 0xfa00, 0xffff, 0xffff, 0xffff,
26];
27const POS_HF2: &[u16] = &[0, 0, 0, 0, 0, 0, 2, 7, 53, 117, 233, 0, 0];
28const DEC_HF3: &[u16] = &[0x0800, 0x2400, 0xee00, 0xfe80, 0xffff, 0xffff, 0xffff];
29const POS_HF3: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 2, 16, 218, 251, 0, 0];
30const DEC_HF4: &[u16] = &[0xff00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff];
31const POS_HF4: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0];
32
33const SHORT_LEN1: [u8; 16] = [1, 3, 4, 4, 5, 6, 7, 8, 8, 4, 4, 5, 6, 6, 4, 0];
34const SHORT_XOR1: [u8; 15] = [
35 0x00, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
36];
37const SHORT_LEN2: [u8; 16] = [2, 3, 3, 3, 4, 4, 5, 6, 6, 4, 4, 5, 6, 6, 4, 0];
38const SHORT_XOR2: [u8; 15] = [
39 0x00, 0x40, 0x60, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
40];
41
42pub fn unpack15_encode(input: &[u8]) -> Result<Vec<u8>> {
43 unpack15_encode_with_options(input, EncodeOptions::default())
44}
45
46pub fn unpack15_encode_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
47 if input.is_empty() {
48 return Ok(Vec::new());
49 }
50
51 let mut encoder = Unpack15Encoder::with_options(options);
52 encoder.encode_member(input)
53}
54
55pub fn unpack15_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
56 let mut decoder = Unpack15::new();
57 decoder.decode_member(input, output_size, false)
58}
59
60pub struct Unpack15Encoder {
61 bits: BitWriter,
62 options: EncodeOptions,
63 ch_set: [u16; 256],
66 ch_set_c: [u16; 256],
67 ch_set_b: [u16; 256],
68 n_to_pl: [u8; 256],
69 n_to_pl_b: [u8; 256],
70 n_to_pl_c: [u8; 256],
71 ch_set_a: [u16; 256],
72 avr_plc: u32,
73 avr_plc_b: u32,
74 avr_ln1: u32,
75 avr_ln2: u32,
76 avr_ln3: u32,
77 max_dist3: u32,
78 nhfb: u32,
79 nlzb: u32,
80 num_huf: u32,
81 old_dist: [u32; 4],
82 old_dist_ptr: usize,
83 last_dist: u32,
84 last_length: u32,
85 l_count: u32,
86 #[cfg(test)]
87 stmode_literal_count: usize,
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub struct EncodeOptions {
92 old_distance_tokens: bool,
93 lazy_matching: bool,
94 stmode_literal_runs: bool,
95 max_long_match_distance: usize,
96}
97
98impl EncodeOptions {
99 pub const fn new() -> Self {
100 Self {
101 old_distance_tokens: true,
102 lazy_matching: true,
103 stmode_literal_runs: true,
104 max_long_match_distance: MAX_LONG_LZ_DISTANCE,
105 }
106 }
107
108 pub const fn with_old_distance_tokens(mut self, enabled: bool) -> Self {
109 self.old_distance_tokens = enabled;
110 self
111 }
112
113 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
114 self.lazy_matching = enabled;
115 self
116 }
117
118 pub const fn with_stmode_literal_runs(mut self, enabled: bool) -> Self {
119 self.stmode_literal_runs = enabled;
120 self
121 }
122
123 pub const fn with_max_long_match_distance(mut self, distance: usize) -> Self {
124 self.max_long_match_distance = distance;
125 self
126 }
127
128 pub const fn old_distance_tokens_enabled(self) -> bool {
129 self.old_distance_tokens
130 }
131}
132
133impl Default for EncodeOptions {
134 fn default() -> Self {
135 Self::new()
136 }
137}
138
139impl Unpack15Encoder {
140 pub fn new() -> Self {
141 Self::with_options(EncodeOptions::default())
142 }
143
144 pub fn with_options(options: EncodeOptions) -> Self {
145 let mut encoder = Self {
146 bits: BitWriter::new(),
147 options,
148 ch_set: [0; 256],
149 ch_set_c: [0; 256],
150 ch_set_b: [0; 256],
151 n_to_pl: [0; 256],
152 n_to_pl_b: [0; 256],
153 n_to_pl_c: [0; 256],
154 ch_set_a: [0; 256],
155 avr_plc: 0x3500,
156 avr_plc_b: 0,
157 avr_ln1: 0,
158 avr_ln2: 0,
159 avr_ln3: 0,
160 max_dist3: 0x2001,
161 nhfb: 0x80,
162 nlzb: 0x80,
163 num_huf: 0,
164 old_dist: [u32::MAX; 4],
165 old_dist_ptr: 0,
166 last_dist: u32::MAX,
167 last_length: 0,
168 l_count: 0,
169 #[cfg(test)]
170 stmode_literal_count: 0,
171 };
172 encoder.init_huff();
173 encoder
174 }
175
176 pub fn encode_literals_only(mut self, input: &[u8]) -> Result<Vec<u8>> {
177 self.encode_literals_only_member(input)
178 }
179
180 fn encode_literals_only_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
181 if input.is_empty() {
182 return Ok(Vec::new());
183 }
184 self.bits = BitWriter::new();
185 let mut pos = 0usize;
186 while pos < input.len() {
187 let mut flags = 0u8;
188 let mut flag_bits = 0usize;
189 let mut payloads = Vec::new();
190 let mut plan_nhfb = self.nhfb;
191 let mut plan_nlzb = self.nlzb;
192 let mut plan_num_huf = self.num_huf;
193 let mut group_enters_stmode = false;
194
195 while flag_bits < 8 && pos < input.len() {
196 let flag = huff_flag_bits(plan_nlzb <= plan_nhfb);
197 if flag_bits + flag.len() > 8 {
198 break;
199 }
200 write_planned_flag_bits(&mut flags, flag_bits, flag);
201 payloads.push(EncodedToken::Literal(input[pos]));
202 flag_bits += flag.len();
203 if flag_bits == 8 && plan_num_huf >= 16 && pos + 1 < input.len() {
204 group_enters_stmode = true;
205 }
206 plan_num_huf += 1;
207 pos += 1;
208 plan_huff_effect(&mut plan_nhfb, &mut plan_nlzb);
209 }
210
211 self.emit_flags_byte(flags)?;
212 self.emit_payloads(payloads)?;
213 if group_enters_stmode {
214 if self.options.stmode_literal_runs {
215 self.emit_stmode_literal_run(input, &[], &mut pos, false)?;
216 }
217 self.emit_stmode_exit()?;
218 }
219 }
220 Ok(std::mem::take(&mut self.bits).finish())
221 }
222
223 pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
224 if input.is_empty() {
225 return Ok(Vec::new());
226 }
227 self.bits = BitWriter::new();
228 let buckets = long_lz_buckets(input);
229 let mut pos = 0usize;
230 while pos < input.len() {
231 let mut flags = 0u8;
232 let mut flag_bits = 0usize;
233 let mut payloads = Vec::new();
234 let mut plan_encoder = self.clone_for_planning();
235 let mut group_enters_stmode = false;
236
237 while flag_bits < 8 && pos < input.len() {
238 let state = plan_encoder.lz_plan_state();
239 if let Some(token) = plan_encoder
240 .choose_lz_token(input, pos, &buckets, state, flag_bits)
241 .filter(|token| {
242 !self.options.lazy_matching
243 || !should_lazy_emit_literal(
244 input,
245 pos,
246 &buckets,
247 *token,
248 state.max_dist3,
249 self.options,
250 )
251 })
252 {
253 let flag = token.flag_bits(state.nlzb, state.nhfb);
254 let next_pos = pos + token.length() as usize;
255 let leaves_unfillable_flag_bit =
256 flag_bits + flag.len() == 7 && next_pos < input.len();
257 if flag_bits + flag.len() <= 8 && !leaves_unfillable_flag_bit {
258 write_planned_flag_bits(&mut flags, flag_bits, flag);
259 flag_bits += flag.len();
260 pos = next_pos;
261 plan_encoder.emit_payloads(vec![token])?;
262 payloads.push(token);
263 continue;
264 }
265 }
266
267 let flag = huff_flag_bits(plan_encoder.nlzb <= plan_encoder.nhfb);
268 if flag_bits + flag.len() > 8 {
269 break;
270 }
271 write_planned_flag_bits(&mut flags, flag_bits, flag);
272 let literal = input[pos];
273 payloads.push(EncodedToken::Literal(input[pos]));
274 flag_bits += flag.len();
275 if flag_bits == 8 && plan_encoder.num_huf >= 16 && pos + 1 < input.len() {
276 group_enters_stmode = true;
277 }
278 pos += 1;
279 plan_encoder.emit_literal(literal)?;
280 }
281
282 self.emit_flags_byte(flags)?;
283 self.emit_payloads(payloads)?;
284 if group_enters_stmode {
285 if self.options.stmode_literal_runs {
286 self.emit_stmode_literal_run(input, &buckets, &mut pos, true)?;
287 }
288 self.emit_stmode_exit()?;
289 }
290 }
291 Ok(std::mem::take(&mut self.bits).finish())
292 }
293
294 fn clone_for_planning(&self) -> Self {
295 Self {
296 bits: BitWriter::new(),
297 options: self.options,
298 ch_set: self.ch_set,
299 ch_set_c: self.ch_set_c,
300 ch_set_b: self.ch_set_b,
301 n_to_pl: self.n_to_pl,
302 n_to_pl_b: self.n_to_pl_b,
303 n_to_pl_c: self.n_to_pl_c,
304 ch_set_a: self.ch_set_a,
305 avr_plc: self.avr_plc,
306 avr_plc_b: self.avr_plc_b,
307 avr_ln1: self.avr_ln1,
308 avr_ln2: self.avr_ln2,
309 avr_ln3: self.avr_ln3,
310 max_dist3: self.max_dist3,
311 nhfb: self.nhfb,
312 nlzb: self.nlzb,
313 num_huf: self.num_huf,
314 old_dist: self.old_dist,
315 old_dist_ptr: self.old_dist_ptr,
316 last_dist: self.last_dist,
317 last_length: self.last_length,
318 l_count: self.l_count,
319 #[cfg(test)]
320 stmode_literal_count: self.stmode_literal_count,
321 }
322 }
323
324 fn lz_plan_state(&self) -> LzPlanState {
325 LzPlanState {
326 last_dist: self.last_dist,
327 last_length: self.last_length,
328 old_dist: self.old_dist,
329 old_dist_ptr: self.old_dist_ptr,
330 max_dist3: self.max_dist3,
331 nlzb: self.nlzb,
332 nhfb: self.nhfb,
333 l_count: self.l_count,
334 }
335 }
336
337 fn choose_lz_token(
338 &self,
339 input: &[u8],
340 pos: usize,
341 buckets: &[Vec<usize>],
342 state: LzPlanState,
343 flag_bits: usize,
344 ) -> Option<EncodedToken> {
345 let candidates = find_lz_tokens(input, pos, buckets, state, self.options);
346 candidates
347 .into_iter()
348 .filter(|token| {
349 let flag_len = token.flag_bits(state.nlzb, state.nhfb).len();
350 let next_pos = pos + token.length() as usize;
351 next_pos == input.len()
352 || (flag_bits + flag_len != 7
353 && !(flag_len == 1 && flag_bits.is_multiple_of(2)))
354 })
355 .filter_map(|token| self.token_bit_cost(token, state).map(|cost| (token, cost)))
356 .min_by(|(left, left_cost), (right, right_cost)| {
357 let left_score = left_cost * 256 / left.length() as usize;
358 let right_score = right_cost * 256 / right.length() as usize;
359 left_score
360 .cmp(&right_score)
361 .then_with(|| right.length().cmp(&left.length()))
362 })
363 .map(|(token, _)| token)
364 }
365
366 fn token_bit_cost(&self, token: EncodedToken, state: LzPlanState) -> Option<usize> {
367 let flag_cost = token.flag_bits(state.nlzb, state.nhfb).len();
368 match token {
369 EncodedToken::Literal(byte) => {
370 let place = self
371 .ch_set
372 .iter()
373 .position(|&value| (value >> 8) as u8 == byte)?;
374 Some(flag_cost + self.literal_place_bit_cost(place)?)
375 }
376 EncodedToken::RepeatLast(_) => {
377 Some(flag_cost + self.repeat_last_bit_cost(state.l_count))
378 }
379 EncodedToken::ShortLz(token) => {
380 let distance_value = token.distance.checked_sub(1)?;
381 let distance_place = self
382 .ch_set_a
383 .iter()
384 .position(|&value| value as u32 == distance_value)?;
385 Some(
386 flag_cost
387 + l_count_break_bit_cost(state.l_count)
388 + self.short_lz_prefix_bit_cost(token.length - 2)?
389 + decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)?,
390 )
391 }
392 EncodedToken::OldDist(token) => {
393 let length_code = old_dist_lz_length_code(
394 token.length,
395 token.distance,
396 state.max_dist3,
397 token.short_code,
398 )?;
399 Some(
400 flag_cost
401 + l_count_break_bit_cost(state.l_count)
402 + self.short_lz_prefix_bit_cost(token.short_code)?
403 + decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)?,
404 )
405 }
406 EncodedToken::LongLz(token) => {
407 let length_code = long_lz_length_code_for_distance(token, state.max_dist3)?;
408 let distance_place = self.long_lz_distance_place(token.distance).ok()?;
409 Some(
410 flag_cost
411 + self.long_lz_length_bit_cost(length_code)?
412 + self.long_lz_distance_bit_cost(distance_place)?
413 + 7,
414 )
415 }
416 }
417 }
418
419 fn emit_payloads(&mut self, payloads: Vec<EncodedToken>) -> Result<()> {
420 for payload in payloads {
421 match payload {
422 EncodedToken::Literal(byte) => self.emit_literal(byte)?,
423 EncodedToken::ShortLz(short_lz) => {
424 self.emit_short_lz(short_lz)?;
425 }
426 EncodedToken::RepeatLast(repeat) => {
427 self.emit_repeat_last(repeat)?;
428 }
429 EncodedToken::OldDist(old_lz) => {
430 self.emit_old_dist_lz(old_lz)?;
431 }
432 EncodedToken::LongLz(long_lz) => {
433 self.emit_long_lz(long_lz)?;
434 }
435 }
436 }
437
438 Ok(())
439 }
440
441 fn emit_flags_byte(&mut self, flags: u8) -> Result<()> {
442 let flags_place = self
443 .ch_set_c
444 .iter()
445 .position(|&value| (value >> 8) as u8 == flags)
446 .ok_or(Error::InvalidData("RAR 1.3 flag byte is not encodable"))?;
447 emit_decode_num(&mut self.bits, flags_place as u32, 5, DEC_HF2, POS_HF2)?;
448
449 let mut cur_flags;
450 let mut new_flags_place;
451 loop {
452 cur_flags = self.ch_set_c[flags_place] as u32;
453 new_flags_place = self.n_to_pl_c[(cur_flags & 0xff) as usize] as usize;
454 self.n_to_pl_c[(cur_flags & 0xff) as usize] =
455 self.n_to_pl_c[(cur_flags & 0xff) as usize].wrapping_add(1);
456 cur_flags += 1;
457 if cur_flags & 0xff == 0 {
458 corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
459 } else {
460 break;
461 }
462 }
463
464 self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
465 self.ch_set_c[new_flags_place] = cur_flags as u16;
466 Ok(())
467 }
468
469 fn emit_literal(&mut self, byte: u8) -> Result<()> {
470 let byte_place = self
471 .ch_set
472 .iter()
473 .position(|&value| (value >> 8) as u8 == byte)
474 .ok_or(Error::InvalidData("RAR 1.3 literal is not encodable"))?;
475 self.emit_literal_place(byte_place, byte_place, true)
476 }
477
478 fn emit_stmode_literal(&mut self, byte: u8) -> Result<()> {
479 let byte_place = self
480 .ch_set
481 .iter()
482 .position(|&value| (value >> 8) as u8 == byte)
483 .ok_or(Error::InvalidData(
484 "RAR 1.3 stmode literal is not encodable",
485 ))?;
486 #[cfg(test)]
487 {
488 self.stmode_literal_count += 1;
489 }
490 self.emit_literal_place(byte_place + 1, byte_place, false)
491 }
492
493 fn emit_stmode_literal_run(
494 &mut self,
495 input: &[u8],
496 buckets: &[Vec<usize>],
497 pos: &mut usize,
498 allow_lz: bool,
499 ) -> Result<()> {
500 while *pos + 1 < input.len() {
501 if allow_lz
502 && find_lz_token(
503 input,
504 *pos,
505 buckets,
506 LzPlanState {
507 last_dist: self.last_dist,
508 last_length: self.last_length,
509 old_dist: self.old_dist,
510 old_dist_ptr: self.old_dist_ptr,
511 max_dist3: self.max_dist3,
512 nlzb: self.nlzb,
513 nhfb: self.nhfb,
514 l_count: self.l_count,
515 },
516 self.options,
517 )
518 .is_some()
519 {
520 break;
521 }
522 self.emit_stmode_literal(input[*pos])?;
523 *pos += 1;
524 }
525 Ok(())
526 }
527
528 fn emit_literal_place(
529 &mut self,
530 encoded_place: usize,
531 decoded_place: usize,
532 update_num_huf: bool,
533 ) -> Result<()> {
534 if encoded_place > self.ch_set.len() || decoded_place >= self.ch_set.len() {
535 return Err(Error::InvalidData("RAR 1.3 literal is not encodable"));
536 }
537
538 let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
539 (8, DEC_HF4, POS_HF4)
540 } else if self.avr_plc > 0x5dff {
541 (6, DEC_HF3, POS_HF3)
542 } else if self.avr_plc > 0x35ff {
543 (5, DEC_HF2, POS_HF2)
544 } else if self.avr_plc > 0x0dff {
545 (5, DEC_HF1, POS_HF1)
546 } else {
547 (4, DEC_HF0, POS_HF0)
548 };
549 emit_decode_num(
550 &mut self.bits,
551 encoded_place as u32,
552 start_pos,
553 dec_tab,
554 pos_tab,
555 )?;
556
557 self.avr_plc += decoded_place as u32;
558 self.avr_plc -= self.avr_plc >> 8;
559 self.nhfb += 16;
560 if self.nhfb > 0xff {
561 self.nhfb = 0x90;
562 self.nlzb >>= 1;
563 }
564 if update_num_huf {
565 self.num_huf += 1;
566 }
567
568 let idx = decoded_place;
569 let mut cur_byte;
570 let mut new_byte_place;
571 loop {
572 cur_byte = self.ch_set[idx] as u32;
573 new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
574 self.n_to_pl[(cur_byte & 0xff) as usize] =
575 self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
576 cur_byte += 1;
577 if cur_byte & 0xff > 0xa1 {
578 corr_huff(&mut self.ch_set, &mut self.n_to_pl);
579 } else {
580 break;
581 }
582 }
583
584 self.ch_set[idx] = self.ch_set[new_byte_place];
585 self.ch_set[new_byte_place] = cur_byte as u16;
586 Ok(())
587 }
588
589 fn emit_short_lz(&mut self, short_lz: ShortLz) -> Result<()> {
590 self.num_huf = 0;
591 if self.l_count == 2 {
592 self.bits.write_bits(0, 1);
593 self.l_count = 0;
594 }
595 let length_place = short_lz.length - 2;
596 self.emit_short_lz_code(length_place as usize)?;
597 self.l_count = 0;
598
599 self.avr_ln1 += length_place;
600 self.avr_ln1 -= self.avr_ln1 >> 4;
601
602 let distance_value = short_lz.distance - 1;
603 let distance_place = self
604 .ch_set_a
605 .iter()
606 .position(|&value| value as u32 == distance_value)
607 .ok_or(Error::InvalidData(
608 "RAR 1.3 ShortLZ distance is not encodable",
609 ))?;
610 emit_decode_num(&mut self.bits, distance_place as u32, 5, DEC_HF2, POS_HF2)?;
611 if distance_place > 0 {
612 let last_distance = self.ch_set_a[distance_place - 1];
613 self.ch_set_a[distance_place] = last_distance;
614 self.ch_set_a[distance_place - 1] = distance_value as u16;
615 }
616 self.remember_match(short_lz.distance, short_lz.length);
617 Ok(())
618 }
619
620 fn emit_repeat_last(&mut self, repeat: RepeatLastLz) -> Result<()> {
621 if self.last_dist != repeat.distance || self.last_length != repeat.length {
622 return Err(Error::InvalidData(
623 "RAR 1.3 repeat-last state is not encodable",
624 ));
625 }
626 self.num_huf = 0;
627 if self.l_count == 2 {
628 self.bits.write_bits(1, 1);
629 } else {
630 self.emit_short_lz_code(9)?;
631 self.l_count += 1;
632 }
633 Ok(())
634 }
635
636 fn emit_old_dist_lz(&mut self, old_lz: OldDistLz) -> Result<()> {
637 self.num_huf = 0;
638 if self.l_count == 2 {
639 self.bits.write_bits(0, 1);
640 self.l_count = 0;
641 }
642 self.emit_short_lz_code(old_lz.short_code as usize)?;
643 self.l_count = 0;
644
645 let expected_distance = self.old_dist[(self
646 .old_dist_ptr
647 .wrapping_sub((old_lz.short_code - 9) as usize))
648 & 3];
649 if expected_distance != old_lz.distance {
650 return Err(Error::InvalidData(
651 "RAR 1.3 old-distance state is not encodable",
652 ));
653 }
654 let length_code = old_dist_lz_length_code(
655 old_lz.length,
656 old_lz.distance,
657 self.max_dist3,
658 old_lz.short_code,
659 )
660 .ok_or(Error::InvalidData(
661 "RAR 1.3 old-distance length is not encodable",
662 ))?;
663 emit_decode_num(&mut self.bits, length_code, 2, DEC_L1, POS_L1)?;
664 self.remember_match(old_lz.distance, old_lz.length);
665 Ok(())
666 }
667
668 fn emit_short_lz_code(&mut self, code: usize) -> Result<()> {
669 let (code_len, code_byte) = if self.avr_ln1 < 37 {
670 (self.short_len1(code), SHORT_XOR1[code])
671 } else {
672 (self.short_len2(code), SHORT_XOR2[code])
673 };
674 self.bits
675 .write_bits((code_byte >> (8 - code_len)) as u32, code_len as usize);
676 Ok(())
677 }
678
679 fn short_lz_prefix_bit_cost(&self, code: u32) -> Option<usize> {
680 let code = usize::try_from(code).ok()?;
681 if code >= SHORT_XOR1.len() {
682 return None;
683 }
684 Some(if self.avr_ln1 < 37 {
685 self.short_len1(code)
686 } else {
687 self.short_len2(code)
688 } as usize)
689 }
690
691 fn repeat_last_bit_cost(&self, l_count: u32) -> usize {
692 if l_count == 2 {
693 1
694 } else {
695 self.short_lz_prefix_bit_cost(9)
696 .expect("repeat-last code is encodable")
697 }
698 }
699
700 fn emit_long_lz(&mut self, long_lz: LongLz) -> Result<()> {
701 self.num_huf = 0;
702 self.nlzb += 16;
703 if self.nlzb > 0xff {
704 self.nlzb = 0x90;
705 self.nhfb >>= 1;
706 }
707 let old_avr2 = self.avr_ln2;
708
709 let length_code = self.long_lz_length_code(long_lz).ok_or(Error::InvalidData(
710 "RAR 1.3 LongLZ match length is not encodable for distance",
711 ))?;
712 emit_long_lz_length(&mut self.bits, self.avr_ln2, length_code)?;
713 self.avr_ln2 += length_code;
714 self.avr_ln2 -= self.avr_ln2 >> 5;
715
716 let distance_place = self.long_lz_distance_place(long_lz.distance)?;
717 let (start_pos, dec_tab, pos_tab) = if self.avr_plc_b > 0x28ff {
718 (5, DEC_HF2, POS_HF2)
719 } else if self.avr_plc_b > 0x06ff {
720 (5, DEC_HF1, POS_HF1)
721 } else {
722 (4, DEC_HF0, POS_HF0)
723 };
724 emit_decode_num(
725 &mut self.bits,
726 distance_place as u32,
727 start_pos,
728 dec_tab,
729 pos_tab,
730 )?;
731 self.avr_plc_b += distance_place as u32;
732 self.avr_plc_b -= self.avr_plc_b >> 8;
733
734 let idx = distance_place;
735 let mut distance;
736 let mut new_distance_place;
737 loop {
738 distance = self.ch_set_b[idx] as u32;
739 new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
740 self.n_to_pl_b[(distance & 0xff) as usize] =
741 self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
742 distance += 1;
743 if distance & 0xff == 0 {
744 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
745 } else {
746 break;
747 }
748 }
749
750 self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
751 self.ch_set_b[new_distance_place] = distance as u16;
752
753 let low_byte = ((long_lz.distance << 1) & 0xff) as u8;
754 self.bits.write_bits((low_byte >> 1) as u32, 7);
755
756 let old_avr3 = self.avr_ln3;
757 if length_code != 1 && length_code != 4 {
758 if length_code == 0 && long_lz.distance <= self.max_dist3 {
759 self.avr_ln3 += 1;
760 self.avr_ln3 -= self.avr_ln3 >> 8;
761 } else if self.avr_ln3 > 0 {
762 self.avr_ln3 -= 1;
763 }
764 }
765 if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
766 self.max_dist3 = 0x7f00;
767 } else {
768 self.max_dist3 = 0x2001;
769 }
770
771 self.remember_match(long_lz.distance, long_lz.length);
772 Ok(())
773 }
774
775 fn long_lz_length_code(&self, long_lz: LongLz) -> Option<u32> {
776 long_lz_length_code_for_distance(long_lz, self.max_dist3)
777 }
778
779 fn long_lz_distance_place(&self, target_distance: u32) -> Result<usize> {
780 let wanted_high = ((target_distance << 1) & 0xff00) as u16;
781 self.ch_set_b
782 .iter()
783 .position(|&value| value & 0xff00 == wanted_high)
784 .ok_or(Error::InvalidData(
785 "RAR 1.3 LongLZ distance is not encodable",
786 ))
787 }
788
789 fn literal_place_bit_cost(&self, place: usize) -> Option<usize> {
790 if self.avr_plc > 0x75ff {
791 decode_num_bit_cost(place as u32, 8, DEC_HF4, POS_HF4)
792 } else if self.avr_plc > 0x5dff {
793 decode_num_bit_cost(place as u32, 6, DEC_HF3, POS_HF3)
794 } else if self.avr_plc > 0x35ff {
795 decode_num_bit_cost(place as u32, 5, DEC_HF2, POS_HF2)
796 } else if self.avr_plc > 0x0dff {
797 decode_num_bit_cost(place as u32, 5, DEC_HF1, POS_HF1)
798 } else {
799 decode_num_bit_cost(place as u32, 4, DEC_HF0, POS_HF0)
800 }
801 }
802
803 fn long_lz_length_bit_cost(&self, length_code: u32) -> Option<usize> {
804 if self.avr_ln2 >= 122 {
805 decode_num_bit_cost(length_code, 3, DEC_L2, POS_L2)
806 } else if self.avr_ln2 >= 64 {
807 decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)
808 } else if length_code <= 7 {
809 Some(length_code as usize + 1)
810 } else if length_code < 0x100 {
811 Some(16)
812 } else {
813 None
814 }
815 }
816
817 fn long_lz_distance_bit_cost(&self, distance_place: usize) -> Option<usize> {
818 if self.avr_plc_b > 0x28ff {
819 decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)
820 } else if self.avr_plc_b > 0x06ff {
821 decode_num_bit_cost(distance_place as u32, 5, DEC_HF1, POS_HF1)
822 } else {
823 decode_num_bit_cost(distance_place as u32, 4, DEC_HF0, POS_HF0)
824 }
825 }
826
827 fn emit_stmode_exit(&mut self) -> Result<()> {
828 let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
829 (8, DEC_HF4, POS_HF4)
830 } else if self.avr_plc > 0x5dff {
831 (6, DEC_HF3, POS_HF3)
832 } else if self.avr_plc > 0x35ff {
833 (5, DEC_HF2, POS_HF2)
834 } else if self.avr_plc > 0x0dff {
835 (5, DEC_HF1, POS_HF1)
836 } else {
837 (4, DEC_HF0, POS_HF0)
838 };
839 emit_decode_num(&mut self.bits, 0, start_pos, dec_tab, pos_tab)?;
840 self.bits.write_bits(1, 1);
841 self.num_huf = 0;
842 Ok(())
843 }
844
845 fn init_huff(&mut self) {
846 for i in 0..256 {
847 self.ch_set[i] = (i as u16) << 8;
848 self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
849 self.ch_set_b[i] = (i as u16) << 8;
850 }
851 self.n_to_pl = [0; 256];
852 self.n_to_pl_b = [0; 256];
853 self.n_to_pl_c = [0; 256];
854 for i in 0..256 {
855 self.ch_set_a[i] = i as u16;
856 }
857 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
858 }
859
860 fn remember_match(&mut self, distance: u32, length: u32) {
861 self.old_dist[self.old_dist_ptr] = distance;
862 self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
863 self.last_length = length;
864 self.last_dist = distance;
865 }
866
867 fn short_len1(&self, pos: usize) -> u8 {
868 if pos == 1 {
869 3
870 } else {
871 SHORT_LEN1[pos]
872 }
873 }
874
875 fn short_len2(&self, pos: usize) -> u8 {
876 if pos == 3 {
877 3
878 } else {
879 SHORT_LEN2[pos]
880 }
881 }
882}
883
884impl Default for Unpack15Encoder {
885 fn default() -> Self {
886 Self::new()
887 }
888}
889
890#[derive(Debug, Clone, Copy)]
891struct LzPlanState {
892 last_dist: u32,
893 last_length: u32,
894 old_dist: [u32; 4],
895 old_dist_ptr: usize,
896 max_dist3: u32,
897 nlzb: u32,
898 nhfb: u32,
899 l_count: u32,
900}
901
902#[derive(Debug, Clone, Copy, PartialEq, Eq)]
903enum EncodedToken {
904 Literal(u8),
905 ShortLz(ShortLz),
906 RepeatLast(RepeatLastLz),
907 OldDist(OldDistLz),
908 LongLz(LongLz),
909}
910
911impl EncodedToken {
912 fn length(self) -> u32 {
913 match self {
914 Self::Literal(_) => 1,
915 Self::ShortLz(token) => token.length,
916 Self::RepeatLast(token) => token.length,
917 Self::OldDist(token) => token.length,
918 Self::LongLz(token) => token.length,
919 }
920 }
921
922 fn flag_bits(self, nlzb: u32, nhfb: u32) -> &'static [bool] {
923 match self {
924 Self::Literal(_) => huff_flag_bits(nlzb <= nhfb),
925 Self::LongLz(_) => long_lz_flag_bits(nlzb > nhfb),
926 Self::ShortLz(_) | Self::RepeatLast(_) | Self::OldDist(_) => &[false, false],
927 }
928 }
929}
930
931#[derive(Debug, Clone, Copy, PartialEq, Eq)]
932struct ShortLz {
933 pub distance: u32,
934 pub length: u32,
935}
936
937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
938struct RepeatLastLz {
939 pub distance: u32,
940 pub length: u32,
941}
942
943#[derive(Debug, Clone, Copy, PartialEq, Eq)]
944struct OldDistLz {
945 pub distance: u32,
946 pub length: u32,
947 pub short_code: u32,
948}
949
950#[derive(Debug, Clone, Copy, PartialEq, Eq)]
951pub struct LongLz {
952 pub distance: u32,
953 pub length: u32,
954}
955
956fn huff_flag_bits(prefer_huff_on_one: bool) -> &'static [bool] {
957 if prefer_huff_on_one {
958 &[true]
959 } else {
960 &[false, true]
961 }
962}
963
964fn long_lz_flag_bits(prefer_long_lz_on_one: bool) -> &'static [bool] {
965 if prefer_long_lz_on_one {
966 &[true]
967 } else {
968 &[false, true]
969 }
970}
971
972fn write_planned_flag_bits(flags: &mut u8, start: usize, bits: &[bool]) {
973 for (offset, &bit) in bits.iter().enumerate() {
974 if bit {
975 *flags |= 1 << (7 - start - offset);
976 }
977 }
978}
979
980fn plan_huff_effect(nhfb: &mut u32, nlzb: &mut u32) {
981 *nhfb += 16;
982 if *nhfb > 0xff {
983 *nhfb = 0x90;
984 *nlzb >>= 1;
985 }
986}
987
988fn l_count_break_bit_cost(l_count: u32) -> usize {
989 usize::from(l_count == 2)
990}
991
992fn find_lz_token(
993 input: &[u8],
994 pos: usize,
995 buckets: &[Vec<usize>],
996 state: LzPlanState,
997 options: EncodeOptions,
998) -> Option<EncodedToken> {
999 find_lz_tokens(input, pos, buckets, state, options)
1000 .into_iter()
1001 .next()
1002}
1003
1004fn find_lz_tokens(
1005 input: &[u8],
1006 pos: usize,
1007 buckets: &[Vec<usize>],
1008 state: LzPlanState,
1009 options: EncodeOptions,
1010) -> Vec<EncodedToken> {
1011 let mut tokens = Vec::with_capacity(4);
1012 if let Some(repeat) = find_repeat_last_lz(input, pos, state.last_dist, state.last_length) {
1013 tokens.push(EncodedToken::RepeatLast(repeat));
1014 }
1015 if options.old_distance_tokens {
1016 if let Some(old_lz) = find_old_dist_lz(
1017 input,
1018 pos,
1019 state.old_dist,
1020 state.old_dist_ptr,
1021 state.max_dist3,
1022 ) {
1023 tokens.push(EncodedToken::OldDist(old_lz));
1024 }
1025 }
1026 if let Some(short_lz) = find_short_lz(input, pos) {
1027 tokens.push(EncodedToken::ShortLz(short_lz));
1028 }
1029 if let Some(long_lz) = find_long_lz_with_buckets(
1030 input,
1031 pos,
1032 options.max_long_match_distance,
1033 buckets,
1034 MAX_LONG_MATCH_CANDIDATES,
1035 )
1036 .filter(|long_lz| long_lz_length_code_for_distance(*long_lz, state.max_dist3).is_some())
1037 {
1038 tokens.push(EncodedToken::LongLz(long_lz));
1039 }
1040 tokens
1041}
1042
1043fn should_lazy_emit_literal(
1044 input: &[u8],
1045 pos: usize,
1046 buckets: &[Vec<usize>],
1047 current: EncodedToken,
1048 max_dist3: u32,
1049 options: EncodeOptions,
1050) -> bool {
1051 if !matches!(current, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1052 || pos + 1 >= input.len()
1053 {
1054 return false;
1055 }
1056
1057 let next = find_lz_token(
1058 input,
1059 pos + 1,
1060 buckets,
1061 LzPlanState {
1062 last_dist: u32::MAX,
1063 last_length: 0,
1064 old_dist: [u32::MAX; 4],
1065 old_dist_ptr: 0,
1066 max_dist3,
1067 nlzb: 0,
1068 nhfb: 0,
1069 l_count: 0,
1070 },
1071 options,
1072 );
1073 next.is_some_and(|next| {
1074 matches!(next, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1075 && next.length() >= current.length() + 2
1076 })
1077}
1078
1079fn find_short_lz(input: &[u8], pos: usize) -> Option<ShortLz> {
1080 if pos < 2 {
1081 return None;
1082 }
1083
1084 let max_distance = pos.min(256);
1085 let mut best = ShortLz {
1086 distance: 0,
1087 length: 0,
1088 };
1089 for distance in 1..=max_distance {
1090 let mut length = 0usize;
1091 while length < 10
1092 && pos + length < input.len()
1093 && input[pos + length] == input[pos + length - distance]
1094 {
1095 length += 1;
1096 }
1097 if length >= 3
1098 && (length > best.length as usize
1099 || (length == best.length as usize && distance < best.distance as usize))
1100 {
1101 best = ShortLz {
1102 distance: distance as u32,
1103 length: length as u32,
1104 };
1105 }
1106 }
1107
1108 (best.length >= 3).then_some(best)
1109}
1110
1111fn find_repeat_last_lz(
1112 input: &[u8],
1113 pos: usize,
1114 last_dist: u32,
1115 last_length: u32,
1116) -> Option<RepeatLastLz> {
1117 if last_dist == u32::MAX || last_dist == 0 || last_length == 0 {
1118 return None;
1119 }
1120 let distance = usize::try_from(last_dist).ok()?;
1121 let length = usize::try_from(last_length).ok()?;
1122 if distance > pos || pos.checked_add(length)? > input.len() {
1123 return None;
1124 }
1125 let matches = (0..length).all(|offset| input[pos + offset] == input[pos + offset - distance]);
1126 matches.then_some(RepeatLastLz {
1127 distance: last_dist,
1128 length: last_length,
1129 })
1130}
1131
1132fn find_old_dist_lz(
1133 input: &[u8],
1134 pos: usize,
1135 old_dist: [u32; 4],
1136 old_dist_ptr: usize,
1137 _max_dist3: u32,
1138) -> Option<OldDistLz> {
1139 let mut best = OldDistLz {
1140 distance: 0,
1141 length: 0,
1142 short_code: 0,
1143 };
1144 for short_code in 10..=13 {
1145 let distance = old_dist[(old_dist_ptr.wrapping_sub((short_code - 9) as usize)) & 3];
1146 if distance == u32::MAX || distance == 0 {
1147 continue;
1148 }
1149 let Ok(distance_usize) = usize::try_from(distance) else {
1150 continue;
1151 };
1152 if distance_usize > pos {
1153 continue;
1154 }
1155 let mut length = 0usize;
1156 while length < 258
1157 && pos + length < input.len()
1158 && input[pos + length] == input[pos + length - distance_usize]
1159 {
1160 length += 1;
1161 }
1162 if length >= 3
1163 && old_dist_lz_is_encodable(length as u32, distance, short_code)
1164 && length > best.length as usize
1165 {
1166 best = OldDistLz {
1167 distance,
1168 length: length as u32,
1169 short_code,
1170 };
1171 }
1172 }
1173
1174 (best.length >= 3).then_some(best)
1175}
1176
1177fn old_dist_lz_is_encodable(length: u32, distance: u32, short_code: u32) -> bool {
1178 old_dist_lz_length_code(length, distance, 0x2001, short_code).is_some()
1179 && old_dist_lz_length_code(length, distance, 0x7f00, short_code).is_some()
1180}
1181
1182fn old_dist_lz_length_code(
1183 length: u32,
1184 distance: u32,
1185 max_dist3: u32,
1186 short_code: u32,
1187) -> Option<u32> {
1188 let decoded_bonus = u32::from(distance > 256) + u32::from(distance >= max_dist3);
1189 let length_code = length.checked_sub(2 + decoded_bonus)?;
1190 if short_code == 10 && length_code == 0xff {
1191 return None;
1192 }
1193 Some(length_code)
1194}
1195
1196fn long_lz_length_code_for_distance(long_lz: LongLz, max_dist3: u32) -> Option<u32> {
1197 let decoded_bonus =
1198 u32::from(long_lz.distance >= max_dist3) + u32::from(long_lz.distance <= 256);
1199 long_lz.length.checked_sub(3 + decoded_bonus)
1200}
1201
1202pub fn find_long_lz(input: &[u8], pos: usize, max_match_distance: usize) -> Option<LongLz> {
1203 if pos < 257 {
1204 return None;
1205 }
1206
1207 let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1208 if max_distance < 257 {
1209 return None;
1210 }
1211 let mut best = LongLz {
1212 distance: 0,
1213 length: 0,
1214 };
1215 for distance in 257..=max_distance {
1216 let mut length = 0usize;
1217 while length < 258
1218 && pos + length < input.len()
1219 && input[pos + length] == input[pos + length - distance]
1220 {
1221 length += 1;
1222 }
1223 if length >= 3 && length > best.length as usize {
1224 best = LongLz {
1225 distance: distance as u32,
1226 length: length as u32,
1227 };
1228 }
1229 }
1230
1231 (best.length >= 3).then_some(best)
1232}
1233
1234fn find_long_lz_with_buckets(
1235 input: &[u8],
1236 pos: usize,
1237 max_match_distance: usize,
1238 buckets: &[Vec<usize>],
1239 max_candidates: usize,
1240) -> Option<LongLz> {
1241 if pos < 257 || pos + 2 >= input.len() || max_candidates == 0 {
1242 return None;
1243 }
1244
1245 let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1246 if max_distance < 257 {
1247 return None;
1248 }
1249 let max_length = (input.len() - pos).min(258);
1250 let mut best = LongLz {
1251 distance: 0,
1252 length: 0,
1253 };
1254 let mut checked = 0usize;
1255 for &candidate in buckets[match_hash(input, pos)].iter().rev() {
1256 if candidate >= pos {
1257 continue;
1258 }
1259 let distance = pos - candidate;
1260 if distance > max_distance {
1261 break;
1262 }
1263 if distance < 257 {
1264 continue;
1265 }
1266 checked += 1;
1267 let mut length = 0usize;
1268 while length < max_length && input[pos + length] == input[pos + length - distance] {
1269 length += 1;
1270 }
1271 if length >= 3
1272 && (length > best.length as usize
1273 || (length == best.length as usize && distance < best.distance as usize))
1274 {
1275 best = LongLz {
1276 distance: distance as u32,
1277 length: length as u32,
1278 };
1279 if length == max_length {
1280 break;
1281 }
1282 }
1283 if checked >= max_candidates {
1284 break;
1285 }
1286 }
1287
1288 (best.length >= 3).then_some(best)
1289}
1290
1291fn long_lz_buckets(input: &[u8]) -> Vec<Vec<usize>> {
1292 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1293 for pos in 0..input.len().saturating_sub(2) {
1294 buckets[match_hash(input, pos)].push(pos);
1295 }
1296 buckets
1297}
1298
1299fn match_hash(input: &[u8], pos: usize) -> usize {
1300 let value =
1301 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1302 value & (MATCH_HASH_BUCKETS - 1)
1303}
1304
1305fn emit_long_lz_length(bits: &mut BitWriter, avr_ln2: u32, length_code: u32) -> Result<()> {
1306 if avr_ln2 >= 122 {
1307 return emit_decode_num(bits, length_code, 3, DEC_L2, POS_L2);
1308 }
1309 if avr_ln2 >= 64 {
1310 return emit_decode_num(bits, length_code, 2, DEC_L1, POS_L1);
1311 }
1312 if length_code <= 7 {
1313 bits.write_bits(1, length_code as usize + 1);
1314 return Ok(());
1315 }
1316 if length_code < 0x100 {
1317 bits.write_bits(length_code, 16);
1318 return Ok(());
1319 }
1320 Err(Error::InvalidData(
1321 "RAR 1.3 LongLZ encoder length is not encodable",
1322 ))
1323}
1324
1325fn emit_decode_num(
1326 bits: &mut BitWriter,
1327 target: u32,
1328 start_pos: u32,
1329 dec_tab: &[u16],
1330 pos_tab: &[u16],
1331) -> Result<()> {
1332 if let Some((code, len)) = encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab) {
1333 bits.write_bits(code, len);
1334 return Ok(());
1335 }
1336 Err(Error::InvalidData(
1337 "RAR 1.3 DecodeNum value is not encodable",
1338 ))
1339}
1340
1341fn decode_num_bit_cost(
1342 target: u32,
1343 start_pos: u32,
1344 dec_tab: &[u16],
1345 pos_tab: &[u16],
1346) -> Option<usize> {
1347 encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab).map(|(_, len)| len)
1348}
1349
1350fn encode_decode_num_prefix(
1351 target: u32,
1352 start_pos: u32,
1353 dec_tab: &[u16],
1354 pos_tab: &[u16],
1355) -> Option<(u32, usize)> {
1356 let end = 16.min(pos_tab.len().saturating_sub(1));
1357 for (len, &base) in pos_tab
1358 .iter()
1359 .enumerate()
1360 .take(end + 1)
1361 .skip(start_pos as usize)
1362 {
1363 let dec_index = len.checked_sub(start_pos as usize)?;
1364 let upper = u32::from(*dec_tab.get(dec_index)?);
1365 let previous = if dec_index == 0 {
1366 0
1367 } else {
1368 u32::from(dec_tab[dec_index - 1])
1369 };
1370 let max_num = upper.checked_sub(1)? & !0xf;
1371 if max_num < previous {
1372 continue;
1373 }
1374 let base = u32::from(base);
1375 let max_target = ((max_num - previous) >> (16 - len)) + base;
1376 if target >= base && target <= max_target {
1377 let num = previous + ((target - base) << (16 - len));
1378 return Some((num >> (16 - len), len));
1379 }
1380 }
1381 None
1382}
1383
1384#[cfg(test)]
1385fn decode_num_prefix_is_stable(
1386 code: u32,
1387 len: usize,
1388 target: u32,
1389 start_pos: u32,
1390 dec_tab: &[u16],
1391 pos_tab: &[u16],
1392) -> bool {
1393 let relevant_tail_bits = 16usize.saturating_sub(len + 4);
1394 for tail in 0..(1u32 << relevant_tail_bits) {
1395 let bit_field = (code << (16 - len)) | (tail << 4);
1396 let (decoded, consumed) = simulate_decode_num(bit_field, start_pos, dec_tab, pos_tab);
1397 if decoded != target || consumed != len {
1398 return false;
1399 }
1400 }
1401 true
1402}
1403
1404#[cfg(test)]
1405fn simulate_decode_num(
1406 bit_field: u32,
1407 mut start_pos: u32,
1408 dec_tab: &[u16],
1409 pos_tab: &[u16],
1410) -> (u32, usize) {
1411 let num = bit_field & 0xfff0;
1412 let mut i = 0usize;
1413 while dec_tab[i] as u32 <= num {
1414 start_pos += 1;
1415 i += 1;
1416 }
1417 (
1418 ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1419 + pos_tab[start_pos as usize] as u32,
1420 start_pos as usize,
1421 )
1422}
1423
1424#[derive(Clone)]
1425pub struct Unpack15 {
1426 bits: BitReader,
1427 target: usize,
1428 output_written: usize,
1429 window: [u8; 0x10000],
1430 unp_ptr: usize,
1431 prev_ptr: usize,
1432 first_win_done: bool,
1433 ch_set: [u16; 256],
1436 ch_set_a: [u16; 256],
1437 ch_set_b: [u16; 256],
1438 ch_set_c: [u16; 256],
1439 n_to_pl: [u8; 256],
1440 n_to_pl_b: [u8; 256],
1441 n_to_pl_c: [u8; 256],
1442 avr_plc: u32,
1443 avr_plc_b: u32,
1444 avr_ln1: u32,
1445 avr_ln2: u32,
1446 avr_ln3: u32,
1447 max_dist3: u32,
1448 nhfb: u32,
1449 nlzb: u32,
1450 num_huf: u32,
1451 buf60: u32,
1452 st_mode: bool,
1453 l_count: u32,
1454 flag_buf: u32,
1455 flags_cnt: i32,
1456 old_dist: [u32; 4],
1457 old_dist_ptr: usize,
1458 last_dist: u32,
1459 last_length: u32,
1460}
1461
1462impl Unpack15 {
1463 pub fn new() -> Self {
1464 Self {
1465 bits: BitReader::new(&[]),
1466 target: 0,
1467 output_written: 0,
1468 window: [0; 0x10000],
1469 unp_ptr: 0,
1470 prev_ptr: 0,
1471 first_win_done: false,
1472 ch_set: [0; 256],
1473 ch_set_a: [0; 256],
1474 ch_set_b: [0; 256],
1475 ch_set_c: [0; 256],
1476 n_to_pl: [0; 256],
1477 n_to_pl_b: [0; 256],
1478 n_to_pl_c: [0; 256],
1479 avr_plc: 0x3500,
1480 avr_plc_b: 0,
1481 avr_ln1: 0,
1482 avr_ln2: 0,
1483 avr_ln3: 0,
1484 max_dist3: 0x2001,
1485 nhfb: 0x80,
1486 nlzb: 0x80,
1487 num_huf: 0,
1488 buf60: 0,
1489 st_mode: false,
1490 l_count: 0,
1491 flag_buf: 0,
1492 flags_cnt: 0,
1493 old_dist: [u32::MAX; 4],
1494 old_dist_ptr: 0,
1495 last_dist: u32::MAX,
1496 last_length: 0,
1497 }
1498 }
1499
1500 pub fn decode_member(&mut self, input: &[u8], target: usize, solid: bool) -> Result<Vec<u8>> {
1501 let mut output = Vec::with_capacity(target);
1502 self.decode_member_to(input, target, solid, &mut output)?;
1503 Ok(output)
1504 }
1505
1506 pub fn decode_member_to(
1507 &mut self,
1508 input: &[u8],
1509 target: usize,
1510 solid: bool,
1511 out: &mut impl Write,
1512 ) -> Result<()> {
1513 self.init_member(target, solid);
1514 self.bits = BitReader::new_final(input);
1515 self.decode_loop(out).map_err(|error| match error {
1516 Error::NeedMoreInput => Error::InvalidData("RAR 1.3 bitstream is truncated"),
1517 error => error,
1518 })
1519 }
1520
1521 pub fn decode_member_from_reader(
1522 &mut self,
1523 input: &mut impl Read,
1524 target: usize,
1525 solid: bool,
1526 out: &mut impl Write,
1527 ) -> Result<()> {
1528 const INPUT_CHUNK: usize = 64 * 1024;
1529 const OUTPUT_CHUNK: usize = 64 * 1024;
1530
1531 self.init_member(target, solid);
1532 self.bits = BitReader::new(&[]);
1533 let mut input_done = false;
1534 let mut buffer = [0u8; INPUT_CHUNK];
1535
1536 while self.output_written < self.target {
1537 let chunk_target = self
1538 .output_written
1539 .saturating_add(OUTPUT_CHUNK)
1540 .min(self.target);
1541 loop {
1542 let checkpoint = self.clone();
1543 let mut chunk = Vec::with_capacity(chunk_target - self.output_written);
1544 match self.decode_loop_until(chunk_target, &mut chunk) {
1545 Ok(()) => {
1546 out.write_all(&chunk)
1547 .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
1548 break;
1549 }
1550 Err(Error::NeedMoreInput) if !input_done => {
1551 *self = checkpoint;
1552 let read = input
1553 .read(&mut buffer)
1554 .map_err(|_| Error::InvalidData("RAR 1.3 input read failed"))?;
1555 if read == 0 {
1556 input_done = true;
1557 self.bits.finish();
1558 } else {
1559 self.bits.append(&buffer[..read]);
1560 }
1561 }
1562 Err(Error::NeedMoreInput) => {
1563 return Err(Error::InvalidData("RAR 1.3 bitstream is truncated"));
1564 }
1565 Err(error) => return Err(error),
1566 }
1567 }
1568 }
1569 Ok(())
1570 }
1571
1572 fn init_member(&mut self, target: usize, solid: bool) {
1573 self.target = target;
1574 self.output_written = 0;
1575 self.flags_cnt = -2;
1576 self.flag_buf = 0;
1577 self.st_mode = false;
1578 self.l_count = 0;
1579
1580 if !solid {
1581 self.reset_non_solid();
1582 }
1583 }
1584
1585 fn decode_loop(&mut self, out: &mut impl Write) -> Result<()> {
1586 if self.target == 0 {
1587 return Ok(());
1588 }
1589
1590 self.decode_loop_until(self.target, out)
1591 }
1592
1593 fn decode_loop_until(&mut self, target: usize, out: &mut impl Write) -> Result<()> {
1594 while self.output_written < target {
1595 self.decode_step(out)?;
1596 }
1597
1598 Ok(())
1599 }
1600
1601 fn decode_step(&mut self, out: &mut impl Write) -> Result<()> {
1602 if self.flags_cnt == -2 {
1603 self.get_flags_buf()?;
1604 self.flags_cnt = 8;
1605 }
1606
1607 self.unp_ptr &= 0xffff;
1608 self.first_win_done |= self.prev_ptr > self.unp_ptr;
1609 self.prev_ptr = self.unp_ptr;
1610
1611 if self.st_mode {
1612 return self.huff_decode(out);
1613 }
1614
1615 self.flags_cnt -= 1;
1616 if self.flags_cnt < 0 {
1617 self.get_flags_buf()?;
1618 self.flags_cnt = 7;
1619 }
1620
1621 if self.flag_buf & 0x80 != 0 {
1622 self.flag_buf = (self.flag_buf << 1) & 0xff;
1623 if self.nlzb > self.nhfb {
1624 self.long_lz(out)
1625 } else {
1626 self.huff_decode(out)
1627 }
1628 } else {
1629 self.flag_buf = (self.flag_buf << 1) & 0xff;
1630 self.flags_cnt -= 1;
1631 if self.flags_cnt < 0 {
1632 self.get_flags_buf()?;
1633 self.flags_cnt = 7;
1634 }
1635 if self.flag_buf & 0x80 != 0 {
1636 self.flag_buf = (self.flag_buf << 1) & 0xff;
1637 if self.nlzb > self.nhfb {
1638 self.huff_decode(out)
1639 } else {
1640 self.long_lz(out)
1641 }
1642 } else {
1643 self.flag_buf = (self.flag_buf << 1) & 0xff;
1644 self.short_lz(out)
1645 }
1646 }
1647 }
1648
1649 fn reset_non_solid(&mut self) {
1650 self.window = [0; 0x10000];
1651 self.unp_ptr = 0;
1652 self.prev_ptr = 0;
1653 self.first_win_done = false;
1654 self.avr_plc_b = 0;
1655 self.avr_ln1 = 0;
1656 self.avr_ln2 = 0;
1657 self.avr_ln3 = 0;
1658 self.num_huf = 0;
1659 self.buf60 = 0;
1660 self.avr_plc = 0x3500;
1661 self.max_dist3 = 0x2001;
1662 self.nhfb = 0x80;
1663 self.nlzb = 0x80;
1664 self.old_dist = [u32::MAX; 4];
1665 self.old_dist_ptr = 0;
1666 self.last_dist = u32::MAX;
1667 self.last_length = 0;
1668 self.init_huff();
1669 }
1670
1671 fn short_lz(&mut self, out: &mut impl Write) -> Result<()> {
1672 self.num_huf = 0;
1673 let mut bit_field = self.bits.get_bits()?;
1674 if self.l_count == 2 {
1675 self.bits.add_bits(1);
1676 if bit_field >= 0x8000 {
1677 self.copy_string(self.last_dist, self.last_length, out)?;
1678 return Ok(());
1679 }
1680 bit_field = (bit_field << 1) & 0xffff;
1681 self.l_count = 0;
1682 }
1683
1684 let bit_byte = (bit_field >> 8) as u8;
1685 let mut length = 0usize;
1686 if self.avr_ln1 < 37 {
1687 while length < SHORT_XOR1.len() {
1688 let short_len = self.short_len1(length);
1689 let mask = (!(0xffu16 >> short_len)) as u8;
1690 if ((bit_byte ^ SHORT_XOR1[length]) & mask) == 0 {
1691 break;
1692 }
1693 length += 1;
1694 }
1695 self.bits.add_bits(self.short_len1(length) as usize);
1696 } else {
1697 while length < SHORT_XOR2.len() {
1698 let short_len = self.short_len2(length);
1699 let mask = (!(0xffu16 >> short_len)) as u8;
1700 if ((bit_byte ^ SHORT_XOR2[length]) & mask) == 0 {
1701 break;
1702 }
1703 length += 1;
1704 }
1705 self.bits.add_bits(self.short_len2(length) as usize);
1706 }
1707
1708 let mut length = length as u32;
1709 if length >= 9 {
1710 if length == 9 {
1711 self.l_count += 1;
1712 self.copy_string(self.last_dist, self.last_length, out)?;
1713 return Ok(());
1714 }
1715 if length == 14 {
1716 self.l_count = 0;
1717 length = self.decode_num(self.bits.get_bits()?, 3, DEC_L2, POS_L2) + 5;
1718 let distance = (self.bits.get_bits()? >> 1) | 0x8000;
1719 self.bits.add_bits(15);
1720 self.last_length = length;
1721 self.last_dist = distance;
1722 self.copy_string(distance, length, out)?;
1723 return Ok(());
1724 }
1725
1726 self.l_count = 0;
1727 let save_length = length;
1728 let distance =
1729 self.old_dist[(self.old_dist_ptr.wrapping_sub((length - 9) as usize)) & 3];
1730 length = self.decode_num(self.bits.get_bits()?, 2, DEC_L1, POS_L1) + 2;
1731 if length == 0x101 && save_length == 10 {
1732 self.buf60 ^= 1;
1733 return Ok(());
1734 }
1735 if distance > 256 {
1736 length += 1;
1737 }
1738 if distance >= self.max_dist3 {
1739 length += 1;
1740 }
1741
1742 self.remember_match(distance, length);
1743 self.copy_string(distance, length, out)?;
1744 return Ok(());
1745 }
1746
1747 self.l_count = 0;
1748 self.avr_ln1 += length;
1749 self.avr_ln1 -= self.avr_ln1 >> 4;
1750
1751 let distance_place =
1752 (self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) & 0xff) as usize;
1753 let mut distance = self.ch_set_a[distance_place] as u32;
1754 if distance_place > 0 {
1755 let last_distance = self.ch_set_a[distance_place - 1];
1756 self.ch_set_a[distance_place] = last_distance;
1757 self.ch_set_a[distance_place - 1] = distance as u16;
1758 }
1759 length += 2;
1760 distance += 1;
1761 self.remember_match(distance, length);
1762 self.copy_string(distance, length, out)
1763 }
1764
1765 fn long_lz(&mut self, out: &mut impl Write) -> Result<()> {
1766 self.num_huf = 0;
1767 self.nlzb += 16;
1768 if self.nlzb > 0xff {
1769 self.nlzb = 0x90;
1770 self.nhfb >>= 1;
1771 }
1772 let old_avr2 = self.avr_ln2;
1773
1774 let bit_field = self.bits.get_bits()?;
1775 let mut length = if self.avr_ln2 >= 122 {
1776 self.decode_num(bit_field, 3, DEC_L2, POS_L2)
1777 } else if self.avr_ln2 >= 64 {
1778 self.decode_num(bit_field, 2, DEC_L1, POS_L1)
1779 } else if bit_field < 0x100 {
1780 self.bits.add_bits(16);
1781 bit_field
1782 } else {
1783 let mut length = 0u32;
1784 while ((bit_field << length) & 0x8000) == 0 {
1785 length += 1;
1786 }
1787 self.bits.add_bits((length + 1) as usize);
1788 length
1789 };
1790
1791 self.avr_ln2 += length;
1792 self.avr_ln2 -= self.avr_ln2 >> 5;
1793
1794 let bit_field = self.bits.get_bits()?;
1795 let distance_place = if self.avr_plc_b > 0x28ff {
1796 self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1797 } else if self.avr_plc_b > 0x06ff {
1798 self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1799 } else {
1800 self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1801 };
1802
1803 self.avr_plc_b += distance_place;
1804 self.avr_plc_b -= self.avr_plc_b >> 8;
1805
1806 let idx = (distance_place & 0xff) as usize;
1807 let mut distance;
1808 let mut new_distance_place;
1809 loop {
1810 distance = self.ch_set_b[idx] as u32;
1811 new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
1812 self.n_to_pl_b[(distance & 0xff) as usize] =
1813 self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
1814 distance += 1;
1815 if distance & 0xff == 0 {
1816 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
1817 } else {
1818 break;
1819 }
1820 }
1821
1822 self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
1823 self.ch_set_b[new_distance_place] = distance as u16;
1824
1825 distance = ((distance & 0xff00) | (self.bits.get_bits()? >> 8)) >> 1;
1826 self.bits.add_bits(7);
1827
1828 let old_avr3 = self.avr_ln3;
1829 if length != 1 && length != 4 {
1830 if length == 0 && distance <= self.max_dist3 {
1831 self.avr_ln3 += 1;
1832 self.avr_ln3 -= self.avr_ln3 >> 8;
1833 } else if self.avr_ln3 > 0 {
1834 self.avr_ln3 -= 1;
1835 }
1836 }
1837 length += 3;
1838 if distance >= self.max_dist3 {
1839 length += 1;
1840 }
1841 if distance <= 256 {
1842 length += 8;
1843 }
1844 if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
1845 self.max_dist3 = 0x7f00;
1846 } else {
1847 self.max_dist3 = 0x2001;
1848 }
1849
1850 self.remember_match(distance, length);
1851 self.copy_string(distance, length, out)
1852 }
1853
1854 fn huff_decode(&mut self, out: &mut impl Write) -> Result<()> {
1855 let bit_field = self.bits.get_bits()?;
1856
1857 let mut byte_place = if self.avr_plc > 0x75ff {
1858 self.decode_num(bit_field, 8, DEC_HF4, POS_HF4)
1859 } else if self.avr_plc > 0x5dff {
1860 self.decode_num(bit_field, 6, DEC_HF3, POS_HF3)
1861 } else if self.avr_plc > 0x35ff {
1862 self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1863 } else if self.avr_plc > 0x0dff {
1864 self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1865 } else {
1866 self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1867 } & 0xff;
1868
1869 if self.st_mode {
1870 if byte_place == 0 && bit_field > 0x0fff {
1871 byte_place = 0x100;
1872 }
1873 if byte_place == 0 {
1874 let bit_field = self.bits.get_bits()?;
1875 self.bits.add_bits(1);
1876 if bit_field & 0x8000 != 0 {
1877 self.num_huf = 0;
1878 self.st_mode = false;
1879 return Ok(());
1880 }
1881
1882 let length = if bit_field & 0x4000 != 0 { 4 } else { 3 };
1883 self.bits.add_bits(1);
1884 let mut distance = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2);
1885 distance = (distance << 5) | (self.bits.get_bits()? >> 11);
1886 self.bits.add_bits(5);
1887 self.copy_string(distance, length, out)?;
1888 return Ok(());
1889 }
1890 byte_place -= 1;
1891 } else {
1892 if self.num_huf >= 16 && self.flags_cnt == 0 {
1893 self.st_mode = true;
1894 }
1895 self.num_huf += 1;
1896 }
1897
1898 self.avr_plc += byte_place;
1899 self.avr_plc -= self.avr_plc >> 8;
1900 self.nhfb += 16;
1901 if self.nhfb > 0xff {
1902 self.nhfb = 0x90;
1903 self.nlzb >>= 1;
1904 }
1905
1906 let byte = (self.ch_set[byte_place as usize] >> 8) as u8;
1907 self.put_byte(byte, out)?;
1908
1909 let idx = byte_place as usize;
1910 let mut cur_byte;
1911 let mut new_byte_place;
1912 loop {
1913 cur_byte = self.ch_set[idx] as u32;
1914 new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
1915 self.n_to_pl[(cur_byte & 0xff) as usize] =
1916 self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
1917 cur_byte += 1;
1918 if cur_byte & 0xff > 0xa1 {
1919 corr_huff(&mut self.ch_set, &mut self.n_to_pl);
1920 } else {
1921 break;
1922 }
1923 }
1924
1925 self.ch_set[idx] = self.ch_set[new_byte_place];
1926 self.ch_set[new_byte_place] = cur_byte as u16;
1927 Ok(())
1928 }
1929
1930 fn get_flags_buf(&mut self) -> Result<()> {
1931 let flags_place = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) as usize;
1932 if flags_place >= self.ch_set_c.len() {
1933 return Ok(());
1934 }
1935
1936 let mut flags;
1937 let mut new_flags_place;
1938 loop {
1939 flags = self.ch_set_c[flags_place] as u32;
1940 new_flags_place = self.n_to_pl_c[(flags & 0xff) as usize] as usize;
1941 self.n_to_pl_c[(flags & 0xff) as usize] =
1942 self.n_to_pl_c[(flags & 0xff) as usize].wrapping_add(1);
1943 self.flag_buf = flags >> 8;
1944 flags += 1;
1945 if flags & 0xff == 0 {
1946 corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
1947 } else {
1948 break;
1949 }
1950 }
1951
1952 self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
1953 self.ch_set_c[new_flags_place] = flags as u16;
1954 Ok(())
1955 }
1956
1957 fn decode_num(
1958 &mut self,
1959 num: u32,
1960 mut start_pos: u32,
1961 dec_tab: &[u16],
1962 pos_tab: &[u16],
1963 ) -> u32 {
1964 let num = num & 0xfff0;
1965 let mut i = 0usize;
1966 while dec_tab[i] as u32 <= num {
1967 start_pos += 1;
1968 i += 1;
1969 }
1970 self.bits.add_bits(start_pos as usize);
1971 ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1972 + pos_tab[start_pos as usize] as u32
1973 }
1974
1975 fn copy_string(&mut self, distance: u32, length: u32, out: &mut impl Write) -> Result<()> {
1976 if self.output_written + length as usize > self.target {
1977 return Err(Error::InvalidData("RAR 1.3 match exceeds output size"));
1978 }
1979
1980 if (!self.first_win_done && distance as usize > self.unp_ptr)
1981 || distance as usize > 0x10000
1982 || distance == 0
1983 {
1984 for _ in 0..length {
1985 self.put_byte(0, out)?;
1986 }
1987 } else {
1988 for _ in 0..length {
1989 let byte = self.window[(self.unp_ptr.wrapping_sub(distance as usize)) & 0xffff];
1990 self.put_byte(byte, out)?;
1991 }
1992 }
1993 Ok(())
1994 }
1995
1996 fn put_byte(&mut self, byte: u8, out: &mut impl Write) -> Result<()> {
1997 if self.output_written >= self.target {
1998 return Err(Error::InvalidData("RAR 1.3 literal exceeds output size"));
1999 }
2000 self.window[self.unp_ptr] = byte;
2001 self.unp_ptr = (self.unp_ptr + 1) & 0xffff;
2002 out.write_all(&[byte])
2003 .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
2004 self.output_written += 1;
2005 Ok(())
2006 }
2007
2008 fn remember_match(&mut self, distance: u32, length: u32) {
2009 self.old_dist[self.old_dist_ptr] = distance;
2010 self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
2011 self.last_length = length;
2012 self.last_dist = distance;
2013 }
2014
2015 fn short_len1(&self, pos: usize) -> u32 {
2016 if pos == 1 {
2017 self.buf60 + 3
2018 } else {
2019 SHORT_LEN1[pos] as u32
2020 }
2021 }
2022
2023 fn short_len2(&self, pos: usize) -> u32 {
2024 if pos == 3 {
2025 self.buf60 + 3
2026 } else {
2027 SHORT_LEN2[pos] as u32
2028 }
2029 }
2030
2031 fn init_huff(&mut self) {
2032 for i in 0..256 {
2033 self.ch_set[i] = (i as u16) << 8;
2034 self.ch_set_b[i] = (i as u16) << 8;
2035 self.ch_set_a[i] = i as u16;
2036 self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
2037 }
2038 self.n_to_pl = [0; 256];
2039 self.n_to_pl_b = [0; 256];
2040 self.n_to_pl_c = [0; 256];
2041 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
2042 }
2043}
2044
2045impl Default for Unpack15 {
2046 fn default() -> Self {
2047 Self::new()
2048 }
2049}
2050
2051fn corr_huff(char_set: &mut [u16; 256], num_to_place: &mut [u8; 256]) {
2052 let mut pos = 0usize;
2053 for rank in (0..=7).rev() {
2054 for _ in 0..32 {
2055 char_set[pos] = (char_set[pos] & !0xff) | rank;
2056 pos += 1;
2057 }
2058 }
2059 *num_to_place = [0; 256];
2060 for rank in (0..=6).rev() {
2061 num_to_place[rank] = ((7 - rank) * 32) as u8;
2062 }
2063}
2064
2065#[cfg(test)]
2066mod tests {
2067 use super::{
2068 decode_num_bit_cost, decode_num_prefix_is_stable, find_long_lz, find_lz_token,
2069 find_old_dist_lz, long_lz_buckets, should_lazy_emit_literal, unpack15_decode,
2070 unpack15_encode, EncodeOptions, EncodedToken, LongLz, LzPlanState, OldDistLz, ShortLz,
2071 Unpack15, Unpack15Encoder, DEC_HF0, DEC_HF1, DEC_HF2, DEC_HF3, DEC_HF4, DEC_L1, DEC_L2,
2072 POS_HF0, POS_HF1, POS_HF2, POS_HF3, POS_HF4, POS_L1, POS_L2,
2073 };
2074
2075 fn brute_decode_num_bit_cost(
2076 target: u32,
2077 start_pos: u32,
2078 dec_tab: &[u16],
2079 pos_tab: &[u16],
2080 ) -> Option<usize> {
2081 for len in start_pos as usize..=16 {
2082 for code in 0..(1u32 << len) {
2083 if decode_num_prefix_is_stable(code, len, target, start_pos, dec_tab, pos_tab) {
2084 return Some(len);
2085 }
2086 }
2087 }
2088 None
2089 }
2090
2091 #[test]
2092 fn decode_member_from_reader_accepts_incremental_input() {
2093 struct TinyReader<'a> {
2094 input: &'a [u8],
2095 }
2096
2097 impl std::io::Read for TinyReader<'_> {
2098 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2099 if self.input.is_empty() {
2100 return Ok(0);
2101 }
2102 let len = self.input.len().min(out.len()).min(2);
2103 out[..len].copy_from_slice(&self.input[..len]);
2104 self.input = &self.input[len..];
2105 Ok(len)
2106 }
2107 }
2108
2109 let expected = b"RAR 1.4 incremental input fixture\n".repeat(32);
2110 let packed = unpack15_encode(&expected).unwrap();
2111 assert_eq!(unpack15_decode(&packed, expected.len()).unwrap(), expected);
2112
2113 let mut reader = TinyReader { input: &packed };
2114 let mut decoder = Unpack15::new();
2115 let mut output = Vec::new();
2116 decoder
2117 .decode_member_from_reader(&mut reader, expected.len(), false, &mut output)
2118 .unwrap();
2119
2120 assert_eq!(output, expected);
2121 }
2122
2123 #[test]
2124 fn decode_num_bit_cost_matches_prefix_search_at_table_boundaries() {
2125 let tables = [
2126 (4, DEC_HF0, POS_HF0),
2127 (5, DEC_HF1, POS_HF1),
2128 (5, DEC_HF2, POS_HF2),
2129 (6, DEC_HF3, POS_HF3),
2130 (8, DEC_HF4, POS_HF4),
2131 (2, DEC_L1, POS_L1),
2132 (3, DEC_L2, POS_L2),
2133 ];
2134 let targets = [0, 1, 2, 3, 7, 8, 16, 24, 32, 33, 53, 117, 233, 255, 256];
2135
2136 for (start_pos, dec_tab, pos_tab) in tables {
2137 for target in targets {
2138 assert_eq!(
2139 decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2140 brute_decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2141 "target {target}, start_pos {start_pos}"
2142 );
2143 }
2144 }
2145 }
2146
2147 #[test]
2148 fn encoder_emits_rar15_very_long_lz_matches() {
2149 let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2150 input.extend_from_within(..258);
2151
2152 assert_eq!(
2153 find_long_lz(&input, 300, 0x8000),
2154 Some(LongLz {
2155 distance: 300,
2156 length: 258
2157 })
2158 );
2159 let packed = unpack15_encode(&input).unwrap();
2160
2161 assert!(
2162 packed.len() < 330,
2163 "very-long LongLZ should encode a 258-byte repeat compactly, got {} bytes",
2164 packed.len()
2165 );
2166 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2167 }
2168
2169 #[test]
2170 fn encoder_adjusts_rar15_long_lz_length_for_far_distance_bonus() {
2171 let mut input: Vec<_> = (0..9000).map(|index| (index * 73 + 19) as u8).collect();
2172 input.extend_from_within(..10);
2173
2174 let packed = unpack15_encode(&input).unwrap();
2175
2176 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2177 }
2178
2179 #[test]
2180 fn encoder_reuses_rar15_repeat_last_token() {
2181 let input = b"abcdefghijklmnop".repeat(64);
2182 let packed = unpack15_encode(&input).unwrap();
2183
2184 assert!(
2185 packed.len() < 100,
2186 "repeat-last tokens should keep a simple repeated pattern compact, got {} bytes",
2187 packed.len()
2188 );
2189 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2190 }
2191
2192 #[test]
2193 fn old_distance_finder_maps_ring_entries_to_short_lz_codes() {
2194 let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2195 let pos = input.len();
2196 input.extend_from_within(pos - 33..pos - 13);
2197
2198 assert_eq!(
2199 find_old_dist_lz(&input, pos, [11, 22, 33, 44], 0, 0x2001),
2200 Some(OldDistLz {
2201 distance: 33,
2202 length: 20,
2203 short_code: 11,
2204 })
2205 );
2206 }
2207
2208 #[test]
2209 fn old_distance_finder_rejects_buf60_toggle_encoding() {
2210 let mut input = b"abcd".repeat(128);
2211 let pos = input.len();
2212 input.extend((0..257).map(|index| b"abcd"[index % 4]));
2213
2214 assert_eq!(
2215 find_old_dist_lz(&input, pos, [u32::MAX, 4, u32::MAX, u32::MAX], 2, 0x2001),
2216 None,
2217 "short-code 10 with decoded base length 0x101 is the Buf60 toggle, not a match"
2218 );
2219 }
2220
2221 #[test]
2222 fn planner_emits_safe_old_distance_token() {
2223 let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2224 let pos = input.len();
2225 input.extend_from_within(pos - 33..pos - 13);
2226
2227 let encoder = Unpack15Encoder::new();
2228 let buckets = long_lz_buckets(&input);
2229 let token = encoder
2230 .choose_lz_token(
2231 &input,
2232 pos,
2233 &buckets,
2234 LzPlanState {
2235 last_dist: u32::MAX,
2236 last_length: 0,
2237 old_dist: [11, 22, 33, 44],
2238 old_dist_ptr: 0,
2239 max_dist3: 0x2001,
2240 nlzb: encoder.nlzb,
2241 nhfb: encoder.nhfb,
2242 l_count: encoder.l_count,
2243 },
2244 0,
2245 )
2246 .expect("old-distance candidate should be selected");
2247
2248 assert_eq!(
2249 token,
2250 EncodedToken::OldDist(OldDistLz {
2251 distance: 33,
2252 length: 20,
2253 short_code: 11,
2254 })
2255 );
2256 }
2257
2258 #[test]
2259 fn encoder_exits_stmode_when_literal_runs_trigger_decoder_mode() {
2260 let input: Vec<_> = (0..96).map(|index| (index * 73 + 19) as u8).collect();
2261 let packed = unpack15_encode(&input).unwrap();
2262
2263 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2264 }
2265
2266 #[test]
2267 fn encoder_emits_stmode_literals_for_long_literal_runs() {
2268 let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2269 let mut encoder = Unpack15Encoder::new();
2270 let packed = encoder.encode_member(&input).unwrap();
2271
2272 assert!(
2273 encoder.stmode_literal_count > 0,
2274 "long literal runs should use stmode literals before exiting stmode"
2275 );
2276 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2277 }
2278
2279 #[test]
2280 fn encoder_options_can_disable_stmode_literal_runs() {
2281 let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2282 let mut encoder =
2283 Unpack15Encoder::with_options(EncodeOptions::new().with_stmode_literal_runs(false));
2284 let packed = encoder.encode_member(&input).unwrap();
2285
2286 assert_eq!(encoder.stmode_literal_count, 0);
2287 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2288 }
2289
2290 #[test]
2291 fn encoder_options_bound_long_lz_search_distance() {
2292 let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2293 input.extend_from_within(..64);
2294
2295 assert_eq!(find_long_lz(&input, 300, 256), None);
2296 assert_eq!(
2297 find_long_lz(&input, 300, 0x8000),
2298 Some(LongLz {
2299 distance: 300,
2300 length: 64
2301 })
2302 );
2303 }
2304
2305 #[test]
2306 fn long_lz_search_rejects_unencodable_32k_distance() {
2307 let mut input = Vec::with_capacity(0x8000 + 64);
2308 let mut state = 0x1234_5678u32;
2309 for _ in 0..0x8000 + 64 {
2310 state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
2311 input.push((state >> 24) as u8);
2312 }
2313 let repeated = input[..64].to_vec();
2314 input[0x8000..0x8000 + 64].copy_from_slice(&repeated);
2315
2316 let token = find_long_lz(&input, 0x8000, 0x8000);
2317
2318 assert_ne!(token.map(|token| token.distance), Some(0x8000));
2319 }
2320
2321 #[test]
2322 fn lazy_match_prefers_longer_next_position_match() {
2323 let input = b"abcXbcQRSTabcQRSTUV";
2324 let buckets = long_lz_buckets(input);
2325 let token = find_lz_token(
2326 input,
2327 10,
2328 &buckets,
2329 LzPlanState {
2330 last_dist: u32::MAX,
2331 last_length: 0,
2332 old_dist: [u32::MAX; 4],
2333 old_dist_ptr: 0,
2334 max_dist3: 0x2001,
2335 nlzb: 0,
2336 nhfb: 0,
2337 l_count: 0,
2338 },
2339 EncodeOptions::default(),
2340 )
2341 .unwrap();
2342
2343 assert!(matches!(
2344 token,
2345 EncodedToken::ShortLz(super::ShortLz { length: 3, .. })
2346 ));
2347 assert!(should_lazy_emit_literal(
2348 input,
2349 10,
2350 &buckets,
2351 token,
2352 0x2001,
2353 EncodeOptions::default()
2354 ));
2355
2356 let packed = unpack15_encode(input).unwrap();
2357 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2358 }
2359
2360 #[test]
2361 fn cost_aware_selection_prefers_better_bits_per_byte_token() {
2362 let mut input = vec![b'Z'; 40];
2363 input[7] = b'A';
2364 input[8] = b'A';
2365 input[9] = b'A';
2366 input[10] = b'B';
2367 input[39] = b'A';
2368 let pos = input.len();
2369 input.extend_from_slice(b"AAAAAAAAAA");
2370
2371 let mut encoder = Unpack15Encoder::new();
2372 encoder.old_dist = [u32::MAX, u32::MAX, u32::MAX, 33];
2373 let buckets = long_lz_buckets(&input);
2374 let token = encoder
2375 .choose_lz_token(
2376 &input,
2377 pos,
2378 &buckets,
2379 LzPlanState {
2380 last_dist: u32::MAX,
2381 last_length: 0,
2382 old_dist: encoder.old_dist,
2383 old_dist_ptr: encoder.old_dist_ptr,
2384 max_dist3: encoder.max_dist3,
2385 nlzb: encoder.nlzb,
2386 nhfb: encoder.nhfb,
2387 l_count: encoder.l_count,
2388 },
2389 0,
2390 )
2391 .unwrap();
2392
2393 assert_eq!(
2394 token,
2395 EncodedToken::ShortLz(ShortLz {
2396 distance: 1,
2397 length: 10,
2398 })
2399 );
2400 }
2401
2402 #[test]
2403 fn planner_uses_simulated_max_dist3_for_old_distance_candidates() {
2404 let mut state = 0x1234_5678u32;
2405 let mut input = Vec::with_capacity(9004);
2406 for _ in 0..9004 {
2407 state ^= state << 13;
2408 state ^= state >> 17;
2409 state ^= state << 5;
2410 input.push(state as u8);
2411 }
2412 let pos = 9000;
2413 let prefix = [input[0], input[1], input[2]];
2414 input[pos..pos + 3].copy_from_slice(&prefix);
2415 input[pos + 3] = input[3].wrapping_add(1);
2416
2417 let mut encoder = Unpack15Encoder::new();
2418 encoder.max_dist3 = 0x7f00;
2419 let buckets = long_lz_buckets(&input);
2420 let token = encoder.choose_lz_token(
2421 &input,
2422 pos,
2423 &buckets,
2424 LzPlanState {
2425 last_dist: u32::MAX,
2426 last_length: 0,
2427 old_dist: [u32::MAX, u32::MAX, u32::MAX, 9000],
2428 old_dist_ptr: 0,
2429 max_dist3: 0x2001,
2430 nlzb: encoder.nlzb,
2431 nhfb: encoder.nhfb,
2432 l_count: encoder.l_count,
2433 },
2434 0,
2435 );
2436
2437 assert_eq!(token, None);
2438 }
2439
2440 #[test]
2441 fn encoder_round_trips_source_shaped_payload() {
2442 let source = include_bytes!("rar13.rs");
2443 let input = &source[..source.len().min(50_902)];
2444
2445 let packed = unpack15_encode(input).unwrap();
2446 let decoded = unpack15_decode(&packed, input.len()).unwrap();
2447
2448 let first_diff = decoded
2449 .iter()
2450 .zip(input)
2451 .position(|(actual, expected)| actual != expected);
2452 assert_eq!(first_diff, None, "first differing byte in decoded payload");
2453 assert_eq!(decoded, input);
2454 }
2455}
2456
2457#[derive(Clone)]
2458struct BitReader {
2459 input: Vec<u8>,
2460 bit_pos: usize,
2461 final_input: bool,
2462}
2463
2464impl BitReader {
2465 fn new(input: &[u8]) -> Self {
2466 Self {
2467 input: input.to_vec(),
2468 bit_pos: 0,
2469 final_input: false,
2470 }
2471 }
2472
2473 fn new_final(input: &[u8]) -> Self {
2474 Self {
2475 input: input.to_vec(),
2476 bit_pos: 0,
2477 final_input: true,
2478 }
2479 }
2480
2481 fn append(&mut self, input: &[u8]) {
2482 self.compact();
2483 self.input.extend_from_slice(input);
2484 }
2485
2486 fn finish(&mut self) {
2487 self.final_input = true;
2488 }
2489
2490 fn compact(&mut self) {
2491 let bytes = self.bit_pos / 8;
2492 if bytes == 0 {
2493 return;
2494 }
2495 self.input.drain(..bytes);
2496 self.bit_pos -= bytes * 8;
2497 }
2498
2499 fn get_bits(&self) -> Result<u32> {
2500 let mut value = 0u32;
2501 for i in 0..16 {
2502 value <<= 1;
2503 let bit_index = self.bit_pos + i;
2504 let byte = match self.input.get(bit_index / 8).copied() {
2505 Some(byte) => byte,
2506 None if self.final_input => 0,
2507 None => return Err(Error::NeedMoreInput),
2508 };
2509 value |= ((byte >> (7 - (bit_index % 8))) & 1) as u32;
2510 }
2511 Ok(value)
2512 }
2513
2514 fn add_bits(&mut self, count: usize) {
2515 self.bit_pos += count;
2516 }
2517}
2518
2519#[derive(Default)]
2520struct BitWriter {
2521 output: Vec<u8>,
2522 bit_pos: usize,
2523}
2524
2525impl BitWriter {
2526 fn new() -> Self {
2527 Self {
2528 output: Vec::new(),
2529 bit_pos: 0,
2530 }
2531 }
2532
2533 fn write_bits(&mut self, value: u32, count: usize) {
2534 for i in (0..count).rev() {
2535 let bit = ((value >> i) & 1) as u8;
2536 if self.bit_pos.is_multiple_of(8) {
2537 self.output.push(0);
2538 }
2539 if bit != 0 {
2540 let idx = self.output.len() - 1;
2541 self.output[idx] |= 1 << (7 - (self.bit_pos % 8));
2542 }
2543 self.bit_pos += 1;
2544 }
2545 }
2546
2547 fn finish(self) -> Vec<u8> {
2548 self.output
2549 }
2550}