1use crate::{Error, Result};
2use std::io::{Read, Write};
3
4const MATCH_HASH_BUCKETS: usize = 4096;
5const MAX_LONG_MATCH_CANDIDATES: usize = 64;
6const MAX_LONG_LZ_DISTANCE: usize = 0x7fff;
7
8const DEC_L1: &[u16] = &[
9 0x8000, 0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf200, 0xffff,
10];
11const POS_L1: &[u16] = &[0, 0, 0, 2, 3, 5, 7, 11, 16, 20, 24, 32, 32];
12const DEC_L2: &[u16] = &[
13 0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf240, 0xffff,
14];
15const POS_L2: &[u16] = &[0, 0, 0, 0, 5, 7, 9, 13, 18, 22, 26, 34, 36];
16const DEC_HF0: &[u16] = &[
17 0x8000, 0xc000, 0xe000, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xffff,
18];
19const POS_HF0: &[u16] = &[0, 0, 0, 0, 0, 8, 16, 24, 33, 33, 33, 33, 33];
20const DEC_HF1: &[u16] = &[
21 0x2000, 0xc000, 0xe000, 0xf000, 0xf200, 0xf200, 0xf7e0, 0xffff,
22];
23const POS_HF1: &[u16] = &[0, 0, 0, 0, 0, 0, 4, 44, 60, 76, 80, 80, 127];
24const DEC_HF2: &[u16] = &[
25 0x1000, 0x2400, 0x8000, 0xc000, 0xfa00, 0xffff, 0xffff, 0xffff,
26];
27const POS_HF2: &[u16] = &[0, 0, 0, 0, 0, 0, 2, 7, 53, 117, 233, 0, 0];
28const DEC_HF3: &[u16] = &[0x0800, 0x2400, 0xee00, 0xfe80, 0xffff, 0xffff, 0xffff];
29const POS_HF3: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 2, 16, 218, 251, 0, 0];
30const DEC_HF4: &[u16] = &[0xff00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff];
31const POS_HF4: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0];
32
33const SHORT_LEN1: [u8; 16] = [1, 3, 4, 4, 5, 6, 7, 8, 8, 4, 4, 5, 6, 6, 4, 0];
34const SHORT_XOR1: [u8; 15] = [
35 0x00, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
36];
37const SHORT_LEN2: [u8; 16] = [2, 3, 3, 3, 4, 4, 5, 6, 6, 4, 4, 5, 6, 6, 4, 0];
38const SHORT_XOR2: [u8; 15] = [
39 0x00, 0x40, 0x60, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
40];
41
42pub fn unpack15_encode(input: &[u8]) -> Result<Vec<u8>> {
43 unpack15_encode_with_options(input, EncodeOptions::default())
44}
45
46pub fn unpack15_encode_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
47 if input.is_empty() {
48 return Ok(Vec::new());
49 }
50
51 let mut encoder = Unpack15Encoder::with_options(options);
52 encoder.encode_member(input)
53}
54
55pub fn unpack15_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
56 let mut decoder = Unpack15::new();
57 decoder.decode_member(input, output_size, false)
58}
59
60pub struct Unpack15Encoder {
61 bits: BitWriter,
62 options: EncodeOptions,
63 ch_set: [u16; 256],
66 ch_set_c: [u16; 256],
67 ch_set_b: [u16; 256],
68 n_to_pl: [u8; 256],
69 n_to_pl_b: [u8; 256],
70 n_to_pl_c: [u8; 256],
71 ch_set_a: [u16; 256],
72 avr_plc: u32,
73 avr_plc_b: u32,
74 avr_ln1: u32,
75 avr_ln2: u32,
76 avr_ln3: u32,
77 max_dist3: u32,
78 nhfb: u32,
79 nlzb: u32,
80 num_huf: u32,
81 old_dist: [u32; 4],
82 old_dist_ptr: usize,
83 last_dist: u32,
84 last_length: u32,
85 l_count: u32,
86 #[cfg(test)]
87 stmode_literal_count: usize,
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub struct EncodeOptions {
92 old_distance_tokens: bool,
93 lazy_matching: bool,
94 stmode_literal_runs: bool,
95 max_long_match_distance: usize,
96}
97
98impl EncodeOptions {
99 pub const fn new() -> Self {
100 Self {
101 old_distance_tokens: true,
102 lazy_matching: true,
103 stmode_literal_runs: true,
104 max_long_match_distance: MAX_LONG_LZ_DISTANCE,
105 }
106 }
107
108 pub const fn with_old_distance_tokens(mut self, enabled: bool) -> Self {
109 self.old_distance_tokens = enabled;
110 self
111 }
112
113 pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
114 self.lazy_matching = enabled;
115 self
116 }
117
118 pub const fn with_stmode_literal_runs(mut self, enabled: bool) -> Self {
119 self.stmode_literal_runs = enabled;
120 self
121 }
122
123 pub const fn with_max_long_match_distance(mut self, distance: usize) -> Self {
124 self.max_long_match_distance = distance;
125 self
126 }
127
128 pub const fn old_distance_tokens_enabled(self) -> bool {
129 self.old_distance_tokens
130 }
131}
132
133impl Default for EncodeOptions {
134 fn default() -> Self {
135 Self::new()
136 }
137}
138
139impl Unpack15Encoder {
140 pub fn new() -> Self {
141 Self::with_options(EncodeOptions::default())
142 }
143
144 pub fn with_options(options: EncodeOptions) -> Self {
145 let mut encoder = Self {
146 bits: BitWriter::new(),
147 options,
148 ch_set: [0; 256],
149 ch_set_c: [0; 256],
150 ch_set_b: [0; 256],
151 n_to_pl: [0; 256],
152 n_to_pl_b: [0; 256],
153 n_to_pl_c: [0; 256],
154 ch_set_a: [0; 256],
155 avr_plc: 0x3500,
156 avr_plc_b: 0,
157 avr_ln1: 0,
158 avr_ln2: 0,
159 avr_ln3: 0,
160 max_dist3: 0x2001,
161 nhfb: 0x80,
162 nlzb: 0x80,
163 num_huf: 0,
164 old_dist: [u32::MAX; 4],
165 old_dist_ptr: 0,
166 last_dist: u32::MAX,
167 last_length: 0,
168 l_count: 0,
169 #[cfg(test)]
170 stmode_literal_count: 0,
171 };
172 encoder.init_huff();
173 encoder
174 }
175
176 pub fn encode_literals_only(mut self, input: &[u8]) -> Result<Vec<u8>> {
177 self.encode_literals_only_member(input)
178 }
179
180 fn encode_literals_only_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
181 if input.is_empty() {
182 return Ok(Vec::new());
183 }
184 self.bits = BitWriter::new();
185 let mut pos = 0usize;
186 while pos < input.len() {
187 let mut flags = 0u8;
188 let mut flag_bits = 0usize;
189 let mut payloads = Vec::new();
190 let mut plan_nhfb = self.nhfb;
191 let mut plan_nlzb = self.nlzb;
192 let mut plan_num_huf = self.num_huf;
193 let mut group_enters_stmode = false;
194
195 while flag_bits < 8 && pos < input.len() {
196 let flag = huff_flag_bits(plan_nlzb <= plan_nhfb);
197 if flag_bits + flag.len() > 8 {
198 break;
199 }
200 write_planned_flag_bits(&mut flags, flag_bits, flag);
201 payloads.push(EncodedToken::Literal(input[pos]));
202 flag_bits += flag.len();
203 if flag_bits == 8 && plan_num_huf >= 16 && pos + 1 < input.len() {
204 group_enters_stmode = true;
205 }
206 plan_num_huf += 1;
207 pos += 1;
208 plan_huff_effect(&mut plan_nhfb, &mut plan_nlzb);
209 }
210
211 self.emit_flags_byte(flags)?;
212 self.emit_payloads(payloads)?;
213 if group_enters_stmode {
214 if self.options.stmode_literal_runs {
215 self.emit_stmode_literal_run(input, &[], &mut pos, false)?;
216 }
217 self.emit_stmode_exit()?;
218 }
219 }
220 Ok(std::mem::take(&mut self.bits).finish())
221 }
222
223 pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
224 if input.is_empty() {
225 return Ok(Vec::new());
226 }
227 self.bits = BitWriter::new();
228 let buckets = long_lz_buckets(input);
229 let mut pos = 0usize;
230 while pos < input.len() {
231 let mut flags = 0u8;
232 let mut flag_bits = 0usize;
233 let mut payloads = Vec::new();
234 let mut plan_encoder = self.clone_for_planning();
235 let mut group_enters_stmode = false;
236
237 while flag_bits < 8 && pos < input.len() {
238 let state = plan_encoder.lz_plan_state();
239 if let Some(token) = plan_encoder
240 .choose_lz_token(input, pos, &buckets, state, flag_bits)
241 .filter(|token| {
242 !self.options.lazy_matching
243 || !should_lazy_emit_literal(
244 input,
245 pos,
246 &buckets,
247 *token,
248 state.max_dist3,
249 self.options,
250 )
251 })
252 {
253 let flag = token.flag_bits(state.nlzb, state.nhfb);
254 let next_pos = pos + token.length() as usize;
255 let leaves_unfillable_flag_bit =
256 flag_bits + flag.len() == 7 && next_pos < input.len();
257 if flag_bits + flag.len() <= 8 && !leaves_unfillable_flag_bit {
258 write_planned_flag_bits(&mut flags, flag_bits, flag);
259 flag_bits += flag.len();
260 pos = next_pos;
261 plan_encoder.emit_payloads(vec![token])?;
262 payloads.push(token);
263 continue;
264 }
265 }
266
267 let flag = huff_flag_bits(plan_encoder.nlzb <= plan_encoder.nhfb);
268 if flag_bits + flag.len() > 8 {
269 break;
270 }
271 write_planned_flag_bits(&mut flags, flag_bits, flag);
272 let literal = input[pos];
273 payloads.push(EncodedToken::Literal(input[pos]));
274 flag_bits += flag.len();
275 if flag_bits == 8 && plan_encoder.num_huf >= 16 && pos + 1 < input.len() {
276 group_enters_stmode = true;
277 }
278 pos += 1;
279 plan_encoder.emit_literal(literal)?;
280 }
281
282 self.emit_flags_byte(flags)?;
283 self.emit_payloads(payloads)?;
284 if group_enters_stmode {
285 if self.options.stmode_literal_runs {
286 self.emit_stmode_literal_run(input, &buckets, &mut pos, true)?;
287 }
288 self.emit_stmode_exit()?;
289 }
290 }
291 Ok(std::mem::take(&mut self.bits).finish())
292 }
293
294 fn clone_for_planning(&self) -> Self {
295 Self {
296 bits: BitWriter::new(),
297 options: self.options,
298 ch_set: self.ch_set,
299 ch_set_c: self.ch_set_c,
300 ch_set_b: self.ch_set_b,
301 n_to_pl: self.n_to_pl,
302 n_to_pl_b: self.n_to_pl_b,
303 n_to_pl_c: self.n_to_pl_c,
304 ch_set_a: self.ch_set_a,
305 avr_plc: self.avr_plc,
306 avr_plc_b: self.avr_plc_b,
307 avr_ln1: self.avr_ln1,
308 avr_ln2: self.avr_ln2,
309 avr_ln3: self.avr_ln3,
310 max_dist3: self.max_dist3,
311 nhfb: self.nhfb,
312 nlzb: self.nlzb,
313 num_huf: self.num_huf,
314 old_dist: self.old_dist,
315 old_dist_ptr: self.old_dist_ptr,
316 last_dist: self.last_dist,
317 last_length: self.last_length,
318 l_count: self.l_count,
319 #[cfg(test)]
320 stmode_literal_count: self.stmode_literal_count,
321 }
322 }
323
324 fn lz_plan_state(&self) -> LzPlanState {
325 LzPlanState {
326 last_dist: self.last_dist,
327 last_length: self.last_length,
328 old_dist: self.old_dist,
329 old_dist_ptr: self.old_dist_ptr,
330 max_dist3: self.max_dist3,
331 nlzb: self.nlzb,
332 nhfb: self.nhfb,
333 l_count: self.l_count,
334 }
335 }
336
337 fn choose_lz_token(
338 &self,
339 input: &[u8],
340 pos: usize,
341 buckets: &[Vec<usize>],
342 state: LzPlanState,
343 flag_bits: usize,
344 ) -> Option<EncodedToken> {
345 let candidates = find_lz_tokens(input, pos, buckets, state, self.options);
346 candidates
347 .into_iter()
348 .filter(|token| {
349 let flag_len = token.flag_bits(state.nlzb, state.nhfb).len();
350 let next_pos = pos + token.length() as usize;
351 next_pos == input.len()
352 || (flag_bits + flag_len != 7
353 && !(flag_len == 1 && flag_bits.is_multiple_of(2)))
354 })
355 .filter_map(|token| self.token_bit_cost(token, state).map(|cost| (token, cost)))
356 .min_by(|(left, left_cost), (right, right_cost)| {
357 let left_score = left_cost * 256 / left.length() as usize;
358 let right_score = right_cost * 256 / right.length() as usize;
359 left_score
360 .cmp(&right_score)
361 .then_with(|| right.length().cmp(&left.length()))
362 })
363 .map(|(token, _)| token)
364 }
365
366 fn token_bit_cost(&self, token: EncodedToken, state: LzPlanState) -> Option<usize> {
367 let flag_cost = token.flag_bits(state.nlzb, state.nhfb).len();
368 match token {
369 EncodedToken::Literal(byte) => {
370 let place = self
371 .ch_set
372 .iter()
373 .position(|&value| (value >> 8) as u8 == byte)?;
374 Some(flag_cost + self.literal_place_bit_cost(place)?)
375 }
376 EncodedToken::RepeatLast(_) => {
377 Some(flag_cost + self.repeat_last_bit_cost(state.l_count))
378 }
379 EncodedToken::ShortLz(token) => {
380 let distance_value = token.distance.checked_sub(1)?;
381 let distance_place = self
382 .ch_set_a
383 .iter()
384 .position(|&value| value as u32 == distance_value)?;
385 Some(
386 flag_cost
387 + l_count_break_bit_cost(state.l_count)
388 + self.short_lz_prefix_bit_cost(token.length - 2)?
389 + decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)?,
390 )
391 }
392 EncodedToken::OldDist(token) => {
393 let length_code = old_dist_lz_length_code(
394 token.length,
395 token.distance,
396 state.max_dist3,
397 token.short_code,
398 )?;
399 Some(
400 flag_cost
401 + l_count_break_bit_cost(state.l_count)
402 + self.short_lz_prefix_bit_cost(token.short_code)?
403 + decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)?,
404 )
405 }
406 EncodedToken::LongLz(token) => {
407 let length_code = long_lz_length_code_for_distance(token, state.max_dist3)?;
408 let distance_place = self.long_lz_distance_place(token.distance).ok()?;
409 Some(
410 flag_cost
411 + self.long_lz_length_bit_cost(length_code)?
412 + self.long_lz_distance_bit_cost(distance_place)?
413 + 7,
414 )
415 }
416 }
417 }
418
419 fn emit_payloads(&mut self, payloads: Vec<EncodedToken>) -> Result<()> {
420 for payload in payloads {
421 match payload {
422 EncodedToken::Literal(byte) => self.emit_literal(byte)?,
423 EncodedToken::ShortLz(short_lz) => {
424 self.emit_short_lz(short_lz)?;
425 }
426 EncodedToken::RepeatLast(repeat) => {
427 self.emit_repeat_last(repeat)?;
428 }
429 EncodedToken::OldDist(old_lz) => {
430 self.emit_old_dist_lz(old_lz)?;
431 }
432 EncodedToken::LongLz(long_lz) => {
433 self.emit_long_lz(long_lz)?;
434 }
435 }
436 }
437
438 Ok(())
439 }
440
441 fn emit_flags_byte(&mut self, flags: u8) -> Result<()> {
442 let flags_place = self
443 .ch_set_c
444 .iter()
445 .position(|&value| (value >> 8) as u8 == flags)
446 .ok_or(Error::InvalidData("RAR 1.3 flag byte is not encodable"))?;
447 emit_decode_num(&mut self.bits, flags_place as u32, 5, DEC_HF2, POS_HF2)?;
448
449 let mut cur_flags;
450 let mut new_flags_place;
451 loop {
452 cur_flags = self.ch_set_c[flags_place] as u32;
453 new_flags_place = self.n_to_pl_c[(cur_flags & 0xff) as usize] as usize;
454 self.n_to_pl_c[(cur_flags & 0xff) as usize] =
455 self.n_to_pl_c[(cur_flags & 0xff) as usize].wrapping_add(1);
456 cur_flags += 1;
457 if cur_flags & 0xff == 0 {
458 corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
459 } else {
460 break;
461 }
462 }
463
464 self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
465 self.ch_set_c[new_flags_place] = cur_flags as u16;
466 Ok(())
467 }
468
469 fn emit_literal(&mut self, byte: u8) -> Result<()> {
470 let byte_place = self
471 .ch_set
472 .iter()
473 .position(|&value| (value >> 8) as u8 == byte)
474 .ok_or(Error::InvalidData("RAR 1.3 literal is not encodable"))?;
475 self.emit_literal_place(byte_place, byte_place, true)
476 }
477
478 fn emit_stmode_literal(&mut self, byte: u8) -> Result<()> {
479 let byte_place = self
480 .ch_set
481 .iter()
482 .position(|&value| (value >> 8) as u8 == byte)
483 .ok_or(Error::InvalidData(
484 "RAR 1.3 stmode literal is not encodable",
485 ))?;
486 #[cfg(test)]
487 {
488 self.stmode_literal_count += 1;
489 }
490 self.emit_literal_place(byte_place + 1, byte_place, false)
491 }
492
493 fn emit_stmode_literal_run(
494 &mut self,
495 input: &[u8],
496 buckets: &[Vec<usize>],
497 pos: &mut usize,
498 allow_lz: bool,
499 ) -> Result<()> {
500 while *pos + 1 < input.len() {
501 if allow_lz
502 && find_lz_token(
503 input,
504 *pos,
505 buckets,
506 LzPlanState {
507 last_dist: self.last_dist,
508 last_length: self.last_length,
509 old_dist: self.old_dist,
510 old_dist_ptr: self.old_dist_ptr,
511 max_dist3: self.max_dist3,
512 nlzb: self.nlzb,
513 nhfb: self.nhfb,
514 l_count: self.l_count,
515 },
516 self.options,
517 )
518 .is_some()
519 {
520 break;
521 }
522 self.emit_stmode_literal(input[*pos])?;
523 *pos += 1;
524 }
525 Ok(())
526 }
527
528 fn emit_literal_place(
529 &mut self,
530 encoded_place: usize,
531 decoded_place: usize,
532 update_num_huf: bool,
533 ) -> Result<()> {
534 if encoded_place > self.ch_set.len() || decoded_place >= self.ch_set.len() {
535 return Err(Error::InvalidData("RAR 1.3 literal is not encodable"));
536 }
537
538 let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
539 (8, DEC_HF4, POS_HF4)
540 } else if self.avr_plc > 0x5dff {
541 (6, DEC_HF3, POS_HF3)
542 } else if self.avr_plc > 0x35ff {
543 (5, DEC_HF2, POS_HF2)
544 } else if self.avr_plc > 0x0dff {
545 (5, DEC_HF1, POS_HF1)
546 } else {
547 (4, DEC_HF0, POS_HF0)
548 };
549 emit_decode_num(
550 &mut self.bits,
551 encoded_place as u32,
552 start_pos,
553 dec_tab,
554 pos_tab,
555 )?;
556
557 self.avr_plc += decoded_place as u32;
558 self.avr_plc -= self.avr_plc >> 8;
559 self.nhfb += 16;
560 if self.nhfb > 0xff {
561 self.nhfb = 0x90;
562 self.nlzb >>= 1;
563 }
564 if update_num_huf {
565 self.num_huf += 1;
566 }
567
568 let idx = decoded_place;
569 let mut cur_byte;
570 let mut new_byte_place;
571 loop {
572 cur_byte = self.ch_set[idx] as u32;
573 new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
574 self.n_to_pl[(cur_byte & 0xff) as usize] =
575 self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
576 cur_byte += 1;
577 if cur_byte & 0xff > 0xa1 {
578 corr_huff(&mut self.ch_set, &mut self.n_to_pl);
579 } else {
580 break;
581 }
582 }
583
584 self.ch_set[idx] = self.ch_set[new_byte_place];
585 self.ch_set[new_byte_place] = cur_byte as u16;
586 Ok(())
587 }
588
589 fn emit_short_lz(&mut self, short_lz: ShortLz) -> Result<()> {
590 self.num_huf = 0;
591 if self.l_count == 2 {
592 self.bits.write_bits(0, 1);
593 self.l_count = 0;
594 }
595 let length_place = short_lz.length - 2;
596 self.emit_short_lz_code(length_place as usize)?;
597 self.l_count = 0;
598
599 self.avr_ln1 += length_place;
600 self.avr_ln1 -= self.avr_ln1 >> 4;
601
602 let distance_value = short_lz.distance - 1;
603 let distance_place = self
604 .ch_set_a
605 .iter()
606 .position(|&value| value as u32 == distance_value)
607 .ok_or(Error::InvalidData(
608 "RAR 1.3 ShortLZ distance is not encodable",
609 ))?;
610 emit_decode_num(&mut self.bits, distance_place as u32, 5, DEC_HF2, POS_HF2)?;
611 if distance_place > 0 {
612 let last_distance = self.ch_set_a[distance_place - 1];
613 self.ch_set_a[distance_place] = last_distance;
614 self.ch_set_a[distance_place - 1] = distance_value as u16;
615 }
616 self.remember_match(short_lz.distance, short_lz.length);
617 Ok(())
618 }
619
620 fn emit_repeat_last(&mut self, repeat: RepeatLastLz) -> Result<()> {
621 if self.last_dist != repeat.distance || self.last_length != repeat.length {
622 return Err(Error::InvalidData(
623 "RAR 1.3 repeat-last state is not encodable",
624 ));
625 }
626 self.num_huf = 0;
627 if self.l_count == 2 {
628 self.bits.write_bits(1, 1);
629 } else {
630 self.emit_short_lz_code(9)?;
631 self.l_count += 1;
632 }
633 Ok(())
634 }
635
636 fn emit_old_dist_lz(&mut self, old_lz: OldDistLz) -> Result<()> {
637 self.num_huf = 0;
638 if self.l_count == 2 {
639 self.bits.write_bits(0, 1);
640 self.l_count = 0;
641 }
642 self.emit_short_lz_code(old_lz.short_code as usize)?;
643 self.l_count = 0;
644
645 let expected_distance = self.old_dist[(self
646 .old_dist_ptr
647 .wrapping_sub((old_lz.short_code - 9) as usize))
648 & 3];
649 if expected_distance != old_lz.distance {
650 return Err(Error::InvalidData(
651 "RAR 1.3 old-distance state is not encodable",
652 ));
653 }
654 let length_code = old_dist_lz_length_code(
655 old_lz.length,
656 old_lz.distance,
657 self.max_dist3,
658 old_lz.short_code,
659 )
660 .ok_or(Error::InvalidData(
661 "RAR 1.3 old-distance length is not encodable",
662 ))?;
663 emit_decode_num(&mut self.bits, length_code, 2, DEC_L1, POS_L1)?;
664 self.remember_match(old_lz.distance, old_lz.length);
665 Ok(())
666 }
667
668 fn emit_short_lz_code(&mut self, code: usize) -> Result<()> {
669 let (code_len, code_byte) = if self.avr_ln1 < 37 {
670 (self.short_len1(code), SHORT_XOR1[code])
671 } else {
672 (self.short_len2(code), SHORT_XOR2[code])
673 };
674 self.bits
675 .write_bits((code_byte >> (8 - code_len)) as u32, code_len as usize);
676 Ok(())
677 }
678
679 fn short_lz_prefix_bit_cost(&self, code: u32) -> Option<usize> {
680 let code = usize::try_from(code).ok()?;
681 if code >= SHORT_XOR1.len() {
682 return None;
683 }
684 Some(if self.avr_ln1 < 37 {
685 self.short_len1(code)
686 } else {
687 self.short_len2(code)
688 } as usize)
689 }
690
691 fn repeat_last_bit_cost(&self, l_count: u32) -> usize {
692 if l_count == 2 {
693 1
694 } else {
695 self.short_lz_prefix_bit_cost(9)
696 .expect("repeat-last code is encodable")
697 }
698 }
699
700 fn emit_long_lz(&mut self, long_lz: LongLz) -> Result<()> {
701 self.num_huf = 0;
702 self.nlzb += 16;
703 if self.nlzb > 0xff {
704 self.nlzb = 0x90;
705 self.nhfb >>= 1;
706 }
707 let old_avr2 = self.avr_ln2;
708
709 let length_code = self.long_lz_length_code(long_lz).ok_or(Error::InvalidData(
710 "RAR 1.3 LongLZ match length is not encodable for distance",
711 ))?;
712 emit_long_lz_length(&mut self.bits, self.avr_ln2, length_code)?;
713 self.avr_ln2 += length_code;
714 self.avr_ln2 -= self.avr_ln2 >> 5;
715
716 let distance_place = self.long_lz_distance_place(long_lz.distance)?;
717 let (start_pos, dec_tab, pos_tab) = if self.avr_plc_b > 0x28ff {
718 (5, DEC_HF2, POS_HF2)
719 } else if self.avr_plc_b > 0x06ff {
720 (5, DEC_HF1, POS_HF1)
721 } else {
722 (4, DEC_HF0, POS_HF0)
723 };
724 emit_decode_num(
725 &mut self.bits,
726 distance_place as u32,
727 start_pos,
728 dec_tab,
729 pos_tab,
730 )?;
731 self.avr_plc_b += distance_place as u32;
732 self.avr_plc_b -= self.avr_plc_b >> 8;
733
734 let idx = distance_place;
735 let mut distance;
736 let mut new_distance_place;
737 loop {
738 distance = self.ch_set_b[idx] as u32;
739 new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
740 self.n_to_pl_b[(distance & 0xff) as usize] =
741 self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
742 distance += 1;
743 if distance & 0xff == 0 {
744 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
745 } else {
746 break;
747 }
748 }
749
750 self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
751 self.ch_set_b[new_distance_place] = distance as u16;
752
753 let low_byte = ((long_lz.distance << 1) & 0xff) as u8;
754 self.bits.write_bits((low_byte >> 1) as u32, 7);
755
756 let old_avr3 = self.avr_ln3;
757 if length_code != 1 && length_code != 4 {
758 if length_code == 0 && long_lz.distance <= self.max_dist3 {
759 self.avr_ln3 += 1;
760 self.avr_ln3 -= self.avr_ln3 >> 8;
761 } else if self.avr_ln3 > 0 {
762 self.avr_ln3 -= 1;
763 }
764 }
765 if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
766 self.max_dist3 = 0x7f00;
767 } else {
768 self.max_dist3 = 0x2001;
769 }
770
771 self.remember_match(long_lz.distance, long_lz.length);
772 Ok(())
773 }
774
775 fn long_lz_length_code(&self, long_lz: LongLz) -> Option<u32> {
776 long_lz_length_code_for_distance(long_lz, self.max_dist3)
777 }
778
779 fn long_lz_distance_place(&self, target_distance: u32) -> Result<usize> {
780 let wanted_high = ((target_distance << 1) & 0xff00) as u16;
781 self.ch_set_b
782 .iter()
783 .position(|&value| value & 0xff00 == wanted_high)
784 .ok_or(Error::InvalidData(
785 "RAR 1.3 LongLZ distance is not encodable",
786 ))
787 }
788
789 fn literal_place_bit_cost(&self, place: usize) -> Option<usize> {
790 if self.avr_plc > 0x75ff {
791 decode_num_bit_cost(place as u32, 8, DEC_HF4, POS_HF4)
792 } else if self.avr_plc > 0x5dff {
793 decode_num_bit_cost(place as u32, 6, DEC_HF3, POS_HF3)
794 } else if self.avr_plc > 0x35ff {
795 decode_num_bit_cost(place as u32, 5, DEC_HF2, POS_HF2)
796 } else if self.avr_plc > 0x0dff {
797 decode_num_bit_cost(place as u32, 5, DEC_HF1, POS_HF1)
798 } else {
799 decode_num_bit_cost(place as u32, 4, DEC_HF0, POS_HF0)
800 }
801 }
802
803 fn long_lz_length_bit_cost(&self, length_code: u32) -> Option<usize> {
804 if self.avr_ln2 >= 122 {
805 decode_num_bit_cost(length_code, 3, DEC_L2, POS_L2)
806 } else if self.avr_ln2 >= 64 {
807 decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)
808 } else if length_code <= 7 {
809 Some(length_code as usize + 1)
810 } else if length_code < 0x100 {
811 Some(16)
812 } else {
813 None
814 }
815 }
816
817 fn long_lz_distance_bit_cost(&self, distance_place: usize) -> Option<usize> {
818 if self.avr_plc_b > 0x28ff {
819 decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)
820 } else if self.avr_plc_b > 0x06ff {
821 decode_num_bit_cost(distance_place as u32, 5, DEC_HF1, POS_HF1)
822 } else {
823 decode_num_bit_cost(distance_place as u32, 4, DEC_HF0, POS_HF0)
824 }
825 }
826
827 fn emit_stmode_exit(&mut self) -> Result<()> {
828 let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
829 (8, DEC_HF4, POS_HF4)
830 } else if self.avr_plc > 0x5dff {
831 (6, DEC_HF3, POS_HF3)
832 } else if self.avr_plc > 0x35ff {
833 (5, DEC_HF2, POS_HF2)
834 } else if self.avr_plc > 0x0dff {
835 (5, DEC_HF1, POS_HF1)
836 } else {
837 (4, DEC_HF0, POS_HF0)
838 };
839 emit_decode_num(&mut self.bits, 0, start_pos, dec_tab, pos_tab)?;
840 self.bits.write_bits(1, 1);
841 self.num_huf = 0;
842 Ok(())
843 }
844
845 fn init_huff(&mut self) {
846 for i in 0..256 {
847 self.ch_set[i] = (i as u16) << 8;
848 self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
849 self.ch_set_b[i] = (i as u16) << 8;
850 }
851 self.n_to_pl = [0; 256];
852 self.n_to_pl_b = [0; 256];
853 self.n_to_pl_c = [0; 256];
854 for i in 0..256 {
855 self.ch_set_a[i] = i as u16;
856 }
857 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
858 }
859
860 fn remember_match(&mut self, distance: u32, length: u32) {
861 self.old_dist[self.old_dist_ptr] = distance;
862 self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
863 self.last_length = length;
864 self.last_dist = distance;
865 }
866
867 fn short_len1(&self, pos: usize) -> u8 {
868 if pos == 1 {
869 3
870 } else {
871 SHORT_LEN1[pos]
872 }
873 }
874
875 fn short_len2(&self, pos: usize) -> u8 {
876 if pos == 3 {
877 3
878 } else {
879 SHORT_LEN2[pos]
880 }
881 }
882}
883
884impl Default for Unpack15Encoder {
885 fn default() -> Self {
886 Self::new()
887 }
888}
889
890#[derive(Debug, Clone, Copy)]
891struct LzPlanState {
892 last_dist: u32,
893 last_length: u32,
894 old_dist: [u32; 4],
895 old_dist_ptr: usize,
896 max_dist3: u32,
897 nlzb: u32,
898 nhfb: u32,
899 l_count: u32,
900}
901
902#[derive(Debug, Clone, Copy, PartialEq, Eq)]
903enum EncodedToken {
904 Literal(u8),
905 ShortLz(ShortLz),
906 RepeatLast(RepeatLastLz),
907 OldDist(OldDistLz),
908 LongLz(LongLz),
909}
910
911impl EncodedToken {
912 fn length(self) -> u32 {
913 match self {
914 Self::Literal(_) => 1,
915 Self::ShortLz(token) => token.length,
916 Self::RepeatLast(token) => token.length,
917 Self::OldDist(token) => token.length,
918 Self::LongLz(token) => token.length,
919 }
920 }
921
922 fn flag_bits(self, nlzb: u32, nhfb: u32) -> &'static [bool] {
923 match self {
924 Self::Literal(_) => huff_flag_bits(nlzb <= nhfb),
925 Self::LongLz(_) => long_lz_flag_bits(nlzb > nhfb),
926 Self::ShortLz(_) | Self::RepeatLast(_) | Self::OldDist(_) => &[false, false],
927 }
928 }
929}
930
931#[derive(Debug, Clone, Copy, PartialEq, Eq)]
932struct ShortLz {
933 pub distance: u32,
934 pub length: u32,
935}
936
937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
938struct RepeatLastLz {
939 pub distance: u32,
940 pub length: u32,
941}
942
943#[derive(Debug, Clone, Copy, PartialEq, Eq)]
944struct OldDistLz {
945 pub distance: u32,
946 pub length: u32,
947 pub short_code: u32,
948}
949
950#[derive(Debug, Clone, Copy, PartialEq, Eq)]
951pub struct LongLz {
952 pub distance: u32,
953 pub length: u32,
954}
955
956fn huff_flag_bits(prefer_huff_on_one: bool) -> &'static [bool] {
957 if prefer_huff_on_one {
958 &[true]
959 } else {
960 &[false, true]
961 }
962}
963
964fn long_lz_flag_bits(prefer_long_lz_on_one: bool) -> &'static [bool] {
965 if prefer_long_lz_on_one {
966 &[true]
967 } else {
968 &[false, true]
969 }
970}
971
972fn write_planned_flag_bits(flags: &mut u8, start: usize, bits: &[bool]) {
973 for (offset, &bit) in bits.iter().enumerate() {
974 if bit {
975 *flags |= 1 << (7 - start - offset);
976 }
977 }
978}
979
980fn plan_huff_effect(nhfb: &mut u32, nlzb: &mut u32) {
981 *nhfb += 16;
982 if *nhfb > 0xff {
983 *nhfb = 0x90;
984 *nlzb >>= 1;
985 }
986}
987
988fn l_count_break_bit_cost(l_count: u32) -> usize {
989 usize::from(l_count == 2)
990}
991
992fn find_lz_token(
993 input: &[u8],
994 pos: usize,
995 buckets: &[Vec<usize>],
996 state: LzPlanState,
997 options: EncodeOptions,
998) -> Option<EncodedToken> {
999 find_lz_tokens(input, pos, buckets, state, options)
1000 .into_iter()
1001 .next()
1002}
1003
1004fn find_lz_tokens(
1005 input: &[u8],
1006 pos: usize,
1007 buckets: &[Vec<usize>],
1008 state: LzPlanState,
1009 options: EncodeOptions,
1010) -> Vec<EncodedToken> {
1011 let mut tokens = Vec::with_capacity(4);
1012 if let Some(repeat) = find_repeat_last_lz(input, pos, state.last_dist, state.last_length) {
1013 tokens.push(EncodedToken::RepeatLast(repeat));
1014 }
1015 if options.old_distance_tokens {
1016 if let Some(old_lz) = find_old_dist_lz(
1017 input,
1018 pos,
1019 state.old_dist,
1020 state.old_dist_ptr,
1021 state.max_dist3,
1022 ) {
1023 tokens.push(EncodedToken::OldDist(old_lz));
1024 }
1025 }
1026 if let Some(short_lz) = find_short_lz(input, pos) {
1027 tokens.push(EncodedToken::ShortLz(short_lz));
1028 }
1029 if let Some(long_lz) = find_long_lz_with_buckets(
1030 input,
1031 pos,
1032 options.max_long_match_distance,
1033 buckets,
1034 MAX_LONG_MATCH_CANDIDATES,
1035 )
1036 .filter(|long_lz| long_lz_length_code_for_distance(*long_lz, state.max_dist3).is_some())
1037 {
1038 tokens.push(EncodedToken::LongLz(long_lz));
1039 }
1040 tokens
1041}
1042
1043fn should_lazy_emit_literal(
1044 input: &[u8],
1045 pos: usize,
1046 buckets: &[Vec<usize>],
1047 current: EncodedToken,
1048 max_dist3: u32,
1049 options: EncodeOptions,
1050) -> bool {
1051 if !matches!(current, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1052 || pos + 1 >= input.len()
1053 {
1054 return false;
1055 }
1056
1057 let next = find_lz_token(
1058 input,
1059 pos + 1,
1060 buckets,
1061 LzPlanState {
1062 last_dist: u32::MAX,
1063 last_length: 0,
1064 old_dist: [u32::MAX; 4],
1065 old_dist_ptr: 0,
1066 max_dist3,
1067 nlzb: 0,
1068 nhfb: 0,
1069 l_count: 0,
1070 },
1071 options,
1072 );
1073 next.is_some_and(|next| {
1074 matches!(next, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1075 && next.length() >= current.length() + 2
1076 })
1077}
1078
1079fn find_short_lz(input: &[u8], pos: usize) -> Option<ShortLz> {
1080 if pos < 2 {
1081 return None;
1082 }
1083
1084 let max_distance = pos.min(256);
1085 let mut best = ShortLz {
1086 distance: 0,
1087 length: 0,
1088 };
1089 for distance in 1..=max_distance {
1090 let mut length = 0usize;
1091 while length < 10
1092 && pos + length < input.len()
1093 && input[pos + length] == input[pos + length - distance]
1094 {
1095 length += 1;
1096 }
1097 if length >= 3
1098 && (length > best.length as usize
1099 || (length == best.length as usize && distance < best.distance as usize))
1100 {
1101 best = ShortLz {
1102 distance: distance as u32,
1103 length: length as u32,
1104 };
1105 }
1106 }
1107
1108 (best.length >= 3).then_some(best)
1109}
1110
1111fn find_repeat_last_lz(
1112 input: &[u8],
1113 pos: usize,
1114 last_dist: u32,
1115 last_length: u32,
1116) -> Option<RepeatLastLz> {
1117 if last_dist == u32::MAX || last_dist == 0 || last_length == 0 {
1118 return None;
1119 }
1120 let distance = usize::try_from(last_dist).ok()?;
1121 let length = usize::try_from(last_length).ok()?;
1122 if distance > pos || pos.checked_add(length)? > input.len() {
1123 return None;
1124 }
1125 let matches = (0..length).all(|offset| input[pos + offset] == input[pos + offset - distance]);
1126 matches.then_some(RepeatLastLz {
1127 distance: last_dist,
1128 length: last_length,
1129 })
1130}
1131
1132fn find_old_dist_lz(
1133 input: &[u8],
1134 pos: usize,
1135 old_dist: [u32; 4],
1136 old_dist_ptr: usize,
1137 _max_dist3: u32,
1138) -> Option<OldDistLz> {
1139 let mut best = OldDistLz {
1140 distance: 0,
1141 length: 0,
1142 short_code: 0,
1143 };
1144 for short_code in 10..=13 {
1145 let distance = old_dist[(old_dist_ptr.wrapping_sub((short_code - 9) as usize)) & 3];
1146 if distance == u32::MAX || distance == 0 {
1147 continue;
1148 }
1149 let Ok(distance_usize) = usize::try_from(distance) else {
1150 continue;
1151 };
1152 if distance_usize > pos {
1153 continue;
1154 }
1155 let mut length = 0usize;
1156 while length < 258
1157 && pos + length < input.len()
1158 && input[pos + length] == input[pos + length - distance_usize]
1159 {
1160 length += 1;
1161 }
1162 if length >= 3
1163 && old_dist_lz_is_encodable(length as u32, distance, short_code)
1164 && length > best.length as usize
1165 {
1166 best = OldDistLz {
1167 distance,
1168 length: length as u32,
1169 short_code,
1170 };
1171 }
1172 }
1173
1174 (best.length >= 3).then_some(best)
1175}
1176
1177fn old_dist_lz_is_encodable(length: u32, distance: u32, short_code: u32) -> bool {
1178 old_dist_lz_length_code(length, distance, 0x2001, short_code).is_some()
1179 && old_dist_lz_length_code(length, distance, 0x7f00, short_code).is_some()
1180}
1181
1182fn old_dist_lz_length_code(
1183 length: u32,
1184 distance: u32,
1185 max_dist3: u32,
1186 short_code: u32,
1187) -> Option<u32> {
1188 let decoded_bonus = u32::from(distance > 256) + u32::from(distance >= max_dist3);
1189 let length_code = length.checked_sub(2 + decoded_bonus)?;
1190 if short_code == 10 && length_code == 0xff {
1191 return None;
1192 }
1193 Some(length_code)
1194}
1195
1196fn long_lz_length_code_for_distance(long_lz: LongLz, max_dist3: u32) -> Option<u32> {
1197 let decoded_bonus =
1198 u32::from(long_lz.distance >= max_dist3) + u32::from(long_lz.distance <= 256);
1199 long_lz.length.checked_sub(3 + decoded_bonus)
1200}
1201
1202pub fn find_long_lz(input: &[u8], pos: usize, max_match_distance: usize) -> Option<LongLz> {
1203 if pos < 257 {
1204 return None;
1205 }
1206
1207 let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1208 if max_distance < 257 {
1209 return None;
1210 }
1211 let mut best = LongLz {
1212 distance: 0,
1213 length: 0,
1214 };
1215 for distance in 257..=max_distance {
1216 let mut length = 0usize;
1217 while length < 258
1218 && pos + length < input.len()
1219 && input[pos + length] == input[pos + length - distance]
1220 {
1221 length += 1;
1222 }
1223 if length >= 3 && length > best.length as usize {
1224 best = LongLz {
1225 distance: distance as u32,
1226 length: length as u32,
1227 };
1228 }
1229 }
1230
1231 (best.length >= 3).then_some(best)
1232}
1233
1234fn find_long_lz_with_buckets(
1235 input: &[u8],
1236 pos: usize,
1237 max_match_distance: usize,
1238 buckets: &[Vec<usize>],
1239 max_candidates: usize,
1240) -> Option<LongLz> {
1241 if pos < 257 || pos + 2 >= input.len() || max_candidates == 0 {
1242 return None;
1243 }
1244
1245 let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1246 if max_distance < 257 {
1247 return None;
1248 }
1249 let max_length = (input.len() - pos).min(258);
1250 let mut best = LongLz {
1251 distance: 0,
1252 length: 0,
1253 };
1254 let mut checked = 0usize;
1255 for &candidate in buckets[match_hash(input, pos)].iter().rev() {
1256 if candidate >= pos {
1257 continue;
1258 }
1259 let distance = pos - candidate;
1260 if distance > max_distance {
1261 break;
1262 }
1263 if distance < 257 {
1264 continue;
1265 }
1266 checked += 1;
1267 let mut length = 0usize;
1268 while length < max_length && input[pos + length] == input[pos + length - distance] {
1269 length += 1;
1270 }
1271 if length >= 3
1272 && (length > best.length as usize
1273 || (length == best.length as usize && distance < best.distance as usize))
1274 {
1275 best = LongLz {
1276 distance: distance as u32,
1277 length: length as u32,
1278 };
1279 if length == max_length {
1280 break;
1281 }
1282 }
1283 if checked >= max_candidates {
1284 break;
1285 }
1286 }
1287
1288 (best.length >= 3).then_some(best)
1289}
1290
1291fn long_lz_buckets(input: &[u8]) -> Vec<Vec<usize>> {
1292 let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1293 for pos in 0..input.len().saturating_sub(2) {
1294 buckets[match_hash(input, pos)].push(pos);
1295 }
1296 buckets
1297}
1298
1299fn match_hash(input: &[u8], pos: usize) -> usize {
1300 let value =
1301 ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1302 value & (MATCH_HASH_BUCKETS - 1)
1303}
1304
1305fn emit_long_lz_length(bits: &mut BitWriter, avr_ln2: u32, length_code: u32) -> Result<()> {
1306 if avr_ln2 >= 122 {
1307 return emit_decode_num(bits, length_code, 3, DEC_L2, POS_L2);
1308 }
1309 if avr_ln2 >= 64 {
1310 return emit_decode_num(bits, length_code, 2, DEC_L1, POS_L1);
1311 }
1312 if length_code <= 7 {
1313 bits.write_bits(1, length_code as usize + 1);
1314 return Ok(());
1315 }
1316 if length_code < 0x100 {
1317 bits.write_bits(length_code, 16);
1318 return Ok(());
1319 }
1320 Err(Error::InvalidData(
1321 "RAR 1.3 LongLZ encoder length is not encodable",
1322 ))
1323}
1324
1325fn emit_decode_num(
1326 bits: &mut BitWriter,
1327 target: u32,
1328 start_pos: u32,
1329 dec_tab: &[u16],
1330 pos_tab: &[u16],
1331) -> Result<()> {
1332 if let Some((code, len)) = encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab) {
1333 bits.write_bits(code, len);
1334 return Ok(());
1335 }
1336 Err(Error::InvalidData(
1337 "RAR 1.3 DecodeNum value is not encodable",
1338 ))
1339}
1340
1341fn decode_num_bit_cost(
1342 target: u32,
1343 start_pos: u32,
1344 dec_tab: &[u16],
1345 pos_tab: &[u16],
1346) -> Option<usize> {
1347 encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab).map(|(_, len)| len)
1348}
1349
1350fn encode_decode_num_prefix(
1351 target: u32,
1352 start_pos: u32,
1353 dec_tab: &[u16],
1354 pos_tab: &[u16],
1355) -> Option<(u32, usize)> {
1356 let end = 16.min(pos_tab.len().saturating_sub(1));
1357 for (len, &base) in pos_tab
1358 .iter()
1359 .enumerate()
1360 .take(end + 1)
1361 .skip(start_pos as usize)
1362 {
1363 let dec_index = len.checked_sub(start_pos as usize)?;
1364 let upper = u32::from(*dec_tab.get(dec_index)?);
1365 let previous = if dec_index == 0 {
1366 0
1367 } else {
1368 u32::from(dec_tab[dec_index - 1])
1369 };
1370 let max_num = upper.checked_sub(1)? & !0xf;
1371 if max_num < previous {
1372 continue;
1373 }
1374 let base = u32::from(base);
1375 let max_target = ((max_num - previous) >> (16 - len)) + base;
1376 if target >= base && target <= max_target {
1377 let num = previous + ((target - base) << (16 - len));
1378 return Some((num >> (16 - len), len));
1379 }
1380 }
1381 None
1382}
1383
1384#[cfg(test)]
1385fn decode_num_prefix_is_stable(
1386 code: u32,
1387 len: usize,
1388 target: u32,
1389 start_pos: u32,
1390 dec_tab: &[u16],
1391 pos_tab: &[u16],
1392) -> bool {
1393 let relevant_tail_bits = 16usize.saturating_sub(len + 4);
1394 for tail in 0..(1u32 << relevant_tail_bits) {
1395 let bit_field = (code << (16 - len)) | (tail << 4);
1396 let (decoded, consumed) = simulate_decode_num(bit_field, start_pos, dec_tab, pos_tab);
1397 if decoded != target || consumed != len {
1398 return false;
1399 }
1400 }
1401 true
1402}
1403
1404#[cfg(test)]
1405fn simulate_decode_num(
1406 bit_field: u32,
1407 mut start_pos: u32,
1408 dec_tab: &[u16],
1409 pos_tab: &[u16],
1410) -> (u32, usize) {
1411 let num = bit_field & 0xfff0;
1412 let mut i = 0usize;
1413 while dec_tab[i] as u32 <= num {
1414 start_pos += 1;
1415 i += 1;
1416 }
1417 (
1418 ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1419 + pos_tab[start_pos as usize] as u32,
1420 start_pos as usize,
1421 )
1422}
1423
1424#[derive(Clone)]
1425pub struct Unpack15 {
1426 bits: BitReader,
1427 target: usize,
1428 output_written: usize,
1429 window: [u8; 0x10000],
1430 unp_ptr: usize,
1431 prev_ptr: usize,
1432 first_win_done: bool,
1433 ch_set: [u16; 256],
1436 ch_set_a: [u16; 256],
1437 ch_set_b: [u16; 256],
1438 ch_set_c: [u16; 256],
1439 n_to_pl: [u8; 256],
1440 n_to_pl_b: [u8; 256],
1441 n_to_pl_c: [u8; 256],
1442 avr_plc: u32,
1443 avr_plc_b: u32,
1444 avr_ln1: u32,
1445 avr_ln2: u32,
1446 avr_ln3: u32,
1447 max_dist3: u32,
1448 nhfb: u32,
1449 nlzb: u32,
1450 num_huf: u32,
1451 buf60: u32,
1452 st_mode: bool,
1453 l_count: u32,
1454 flag_buf: u32,
1455 flags_cnt: i32,
1456 old_dist: [u32; 4],
1457 old_dist_ptr: usize,
1458 last_dist: u32,
1459 last_length: u32,
1460}
1461
1462impl Unpack15 {
1463 pub fn new() -> Self {
1464 Self {
1465 bits: BitReader::new(&[]),
1466 target: 0,
1467 output_written: 0,
1468 window: [0; 0x10000],
1469 unp_ptr: 0,
1470 prev_ptr: 0,
1471 first_win_done: false,
1472 ch_set: [0; 256],
1473 ch_set_a: [0; 256],
1474 ch_set_b: [0; 256],
1475 ch_set_c: [0; 256],
1476 n_to_pl: [0; 256],
1477 n_to_pl_b: [0; 256],
1478 n_to_pl_c: [0; 256],
1479 avr_plc: 0x3500,
1480 avr_plc_b: 0,
1481 avr_ln1: 0,
1482 avr_ln2: 0,
1483 avr_ln3: 0,
1484 max_dist3: 0x2001,
1485 nhfb: 0x80,
1486 nlzb: 0x80,
1487 num_huf: 0,
1488 buf60: 0,
1489 st_mode: false,
1490 l_count: 0,
1491 flag_buf: 0,
1492 flags_cnt: 0,
1493 old_dist: [u32::MAX; 4],
1494 old_dist_ptr: 0,
1495 last_dist: u32::MAX,
1496 last_length: 0,
1497 }
1498 }
1499
1500 pub fn decode_member(&mut self, input: &[u8], target: usize, solid: bool) -> Result<Vec<u8>> {
1501 let mut output = Vec::with_capacity(target);
1502 self.decode_member_to(input, target, solid, &mut output)?;
1503 Ok(output)
1504 }
1505
1506 pub fn decode_member_to(
1507 &mut self,
1508 input: &[u8],
1509 target: usize,
1510 solid: bool,
1511 out: &mut impl Write,
1512 ) -> Result<()> {
1513 self.init_member(target, solid);
1514 self.bits = BitReader::new_final(input);
1515 self.decode_loop(out).map_err(|error| match error {
1516 Error::NeedMoreInput => Error::InvalidData("RAR 1.3 bitstream is truncated"),
1517 error => error,
1518 })
1519 }
1520
1521 pub fn decode_member_from_reader(
1522 &mut self,
1523 input: &mut impl Read,
1524 target: usize,
1525 solid: bool,
1526 out: &mut impl Write,
1527 ) -> Result<()> {
1528 const OUTPUT_CHUNK: usize = 64 * 1024;
1529
1530 self.init_member(target, solid);
1531 self.bits = BitReader::new(&[]);
1532 let mut packed = Vec::new();
1533 input
1534 .read_to_end(&mut packed)
1535 .map_err(|_| Error::InvalidData("RAR 1.3 input read failed"))?;
1536 self.bits.append(&packed);
1537 self.bits.finish();
1538
1539 while self.output_written < self.target {
1540 let chunk_target = self
1541 .output_written
1542 .saturating_add(OUTPUT_CHUNK)
1543 .min(self.target);
1544 let mut chunk = Vec::with_capacity(chunk_target - self.output_written);
1545 self.decode_loop_until(chunk_target, &mut chunk)
1546 .map_err(|error| match error {
1547 Error::NeedMoreInput => Error::InvalidData("RAR 1.3 bitstream is truncated"),
1548 error => error,
1549 })?;
1550 out.write_all(&chunk)
1551 .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
1552 }
1553 Ok(())
1554 }
1555
1556 fn init_member(&mut self, target: usize, solid: bool) {
1557 self.target = target;
1558 self.output_written = 0;
1559 self.flags_cnt = -2;
1560 self.flag_buf = 0;
1561 self.st_mode = false;
1562 self.l_count = 0;
1563
1564 if !solid {
1565 self.reset_non_solid();
1566 }
1567 }
1568
1569 fn decode_loop(&mut self, out: &mut impl Write) -> Result<()> {
1570 if self.target == 0 {
1571 return Ok(());
1572 }
1573
1574 self.decode_loop_until(self.target, out)
1575 }
1576
1577 fn decode_loop_until(&mut self, target: usize, out: &mut impl Write) -> Result<()> {
1578 while self.output_written < target {
1579 self.decode_step(out)?;
1580 }
1581
1582 Ok(())
1583 }
1584
1585 fn decode_step(&mut self, out: &mut impl Write) -> Result<()> {
1586 if self.flags_cnt == -2 {
1587 self.get_flags_buf()?;
1588 self.flags_cnt = 8;
1589 }
1590
1591 self.unp_ptr &= 0xffff;
1592 self.first_win_done |= self.prev_ptr > self.unp_ptr;
1593 self.prev_ptr = self.unp_ptr;
1594
1595 if self.st_mode {
1596 return self.huff_decode(out);
1597 }
1598
1599 self.flags_cnt -= 1;
1600 if self.flags_cnt < 0 {
1601 self.get_flags_buf()?;
1602 self.flags_cnt = 7;
1603 }
1604
1605 if self.flag_buf & 0x80 != 0 {
1606 self.flag_buf = (self.flag_buf << 1) & 0xff;
1607 if self.nlzb > self.nhfb {
1608 self.long_lz(out)
1609 } else {
1610 self.huff_decode(out)
1611 }
1612 } else {
1613 self.flag_buf = (self.flag_buf << 1) & 0xff;
1614 self.flags_cnt -= 1;
1615 if self.flags_cnt < 0 {
1616 self.get_flags_buf()?;
1617 self.flags_cnt = 7;
1618 }
1619 if self.flag_buf & 0x80 != 0 {
1620 self.flag_buf = (self.flag_buf << 1) & 0xff;
1621 if self.nlzb > self.nhfb {
1622 self.huff_decode(out)
1623 } else {
1624 self.long_lz(out)
1625 }
1626 } else {
1627 self.flag_buf = (self.flag_buf << 1) & 0xff;
1628 self.short_lz(out)
1629 }
1630 }
1631 }
1632
1633 fn reset_non_solid(&mut self) {
1634 self.window = [0; 0x10000];
1635 self.unp_ptr = 0;
1636 self.prev_ptr = 0;
1637 self.first_win_done = false;
1638 self.avr_plc_b = 0;
1639 self.avr_ln1 = 0;
1640 self.avr_ln2 = 0;
1641 self.avr_ln3 = 0;
1642 self.num_huf = 0;
1643 self.buf60 = 0;
1644 self.avr_plc = 0x3500;
1645 self.max_dist3 = 0x2001;
1646 self.nhfb = 0x80;
1647 self.nlzb = 0x80;
1648 self.old_dist = [u32::MAX; 4];
1649 self.old_dist_ptr = 0;
1650 self.last_dist = u32::MAX;
1651 self.last_length = 0;
1652 self.init_huff();
1653 }
1654
1655 fn short_lz(&mut self, out: &mut impl Write) -> Result<()> {
1656 self.num_huf = 0;
1657 let mut bit_field = self.bits.get_bits()?;
1658 if self.l_count == 2 {
1659 self.bits.add_bits(1);
1660 if bit_field >= 0x8000 {
1661 self.copy_string(self.last_dist, self.last_length, out)?;
1662 return Ok(());
1663 }
1664 bit_field = (bit_field << 1) & 0xffff;
1665 self.l_count = 0;
1666 }
1667
1668 let bit_byte = (bit_field >> 8) as u8;
1669 let mut length = 0usize;
1670 if self.avr_ln1 < 37 {
1671 while length < SHORT_XOR1.len() {
1672 let short_len = self.short_len1(length);
1673 let mask = (!(0xffu16 >> short_len)) as u8;
1674 if ((bit_byte ^ SHORT_XOR1[length]) & mask) == 0 {
1675 break;
1676 }
1677 length += 1;
1678 }
1679 self.bits.add_bits(self.short_len1(length) as usize);
1680 } else {
1681 while length < SHORT_XOR2.len() {
1682 let short_len = self.short_len2(length);
1683 let mask = (!(0xffu16 >> short_len)) as u8;
1684 if ((bit_byte ^ SHORT_XOR2[length]) & mask) == 0 {
1685 break;
1686 }
1687 length += 1;
1688 }
1689 self.bits.add_bits(self.short_len2(length) as usize);
1690 }
1691
1692 let mut length = length as u32;
1693 if length >= 9 {
1694 if length == 9 {
1695 self.l_count += 1;
1696 self.copy_string(self.last_dist, self.last_length, out)?;
1697 return Ok(());
1698 }
1699 if length == 14 {
1700 self.l_count = 0;
1701 length = self.decode_num(self.bits.get_bits()?, 3, DEC_L2, POS_L2) + 5;
1702 let distance = (self.bits.get_bits()? >> 1) | 0x8000;
1703 self.bits.add_bits(15);
1704 self.last_length = length;
1705 self.last_dist = distance;
1706 self.copy_string(distance, length, out)?;
1707 return Ok(());
1708 }
1709
1710 self.l_count = 0;
1711 let save_length = length;
1712 let distance =
1713 self.old_dist[(self.old_dist_ptr.wrapping_sub((length - 9) as usize)) & 3];
1714 length = self.decode_num(self.bits.get_bits()?, 2, DEC_L1, POS_L1) + 2;
1715 if length == 0x101 && save_length == 10 {
1716 self.buf60 ^= 1;
1717 return Ok(());
1718 }
1719 if distance > 256 {
1720 length += 1;
1721 }
1722 if distance >= self.max_dist3 {
1723 length += 1;
1724 }
1725
1726 self.remember_match(distance, length);
1727 self.copy_string(distance, length, out)?;
1728 return Ok(());
1729 }
1730
1731 self.l_count = 0;
1732 self.avr_ln1 += length;
1733 self.avr_ln1 -= self.avr_ln1 >> 4;
1734
1735 let distance_place =
1736 (self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) & 0xff) as usize;
1737 let mut distance = self.ch_set_a[distance_place] as u32;
1738 if distance_place > 0 {
1739 let last_distance = self.ch_set_a[distance_place - 1];
1740 self.ch_set_a[distance_place] = last_distance;
1741 self.ch_set_a[distance_place - 1] = distance as u16;
1742 }
1743 length += 2;
1744 distance += 1;
1745 self.remember_match(distance, length);
1746 self.copy_string(distance, length, out)
1747 }
1748
1749 fn long_lz(&mut self, out: &mut impl Write) -> Result<()> {
1750 self.num_huf = 0;
1751 self.nlzb += 16;
1752 if self.nlzb > 0xff {
1753 self.nlzb = 0x90;
1754 self.nhfb >>= 1;
1755 }
1756 let old_avr2 = self.avr_ln2;
1757
1758 let bit_field = self.bits.get_bits()?;
1759 let mut length = if self.avr_ln2 >= 122 {
1760 self.decode_num(bit_field, 3, DEC_L2, POS_L2)
1761 } else if self.avr_ln2 >= 64 {
1762 self.decode_num(bit_field, 2, DEC_L1, POS_L1)
1763 } else if bit_field < 0x100 {
1764 self.bits.add_bits(16);
1765 bit_field
1766 } else {
1767 let mut length = 0u32;
1768 while ((bit_field << length) & 0x8000) == 0 {
1769 length += 1;
1770 }
1771 self.bits.add_bits((length + 1) as usize);
1772 length
1773 };
1774
1775 self.avr_ln2 += length;
1776 self.avr_ln2 -= self.avr_ln2 >> 5;
1777
1778 let bit_field = self.bits.get_bits()?;
1779 let distance_place = if self.avr_plc_b > 0x28ff {
1780 self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1781 } else if self.avr_plc_b > 0x06ff {
1782 self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1783 } else {
1784 self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1785 };
1786
1787 self.avr_plc_b += distance_place;
1788 self.avr_plc_b -= self.avr_plc_b >> 8;
1789
1790 let idx = (distance_place & 0xff) as usize;
1791 let mut distance;
1792 let mut new_distance_place;
1793 loop {
1794 distance = self.ch_set_b[idx] as u32;
1795 new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
1796 self.n_to_pl_b[(distance & 0xff) as usize] =
1797 self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
1798 distance += 1;
1799 if distance & 0xff == 0 {
1800 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
1801 } else {
1802 break;
1803 }
1804 }
1805
1806 self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
1807 self.ch_set_b[new_distance_place] = distance as u16;
1808
1809 distance = ((distance & 0xff00) | (self.bits.get_bits()? >> 8)) >> 1;
1810 self.bits.add_bits(7);
1811
1812 let old_avr3 = self.avr_ln3;
1813 if length != 1 && length != 4 {
1814 if length == 0 && distance <= self.max_dist3 {
1815 self.avr_ln3 += 1;
1816 self.avr_ln3 -= self.avr_ln3 >> 8;
1817 } else if self.avr_ln3 > 0 {
1818 self.avr_ln3 -= 1;
1819 }
1820 }
1821 length += 3;
1822 if distance >= self.max_dist3 {
1823 length += 1;
1824 }
1825 if distance <= 256 {
1826 length += 8;
1827 }
1828 if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
1829 self.max_dist3 = 0x7f00;
1830 } else {
1831 self.max_dist3 = 0x2001;
1832 }
1833
1834 self.remember_match(distance, length);
1835 self.copy_string(distance, length, out)
1836 }
1837
1838 fn huff_decode(&mut self, out: &mut impl Write) -> Result<()> {
1839 let bit_field = self.bits.get_bits()?;
1840
1841 let mut byte_place = if self.avr_plc > 0x75ff {
1842 self.decode_num(bit_field, 8, DEC_HF4, POS_HF4)
1843 } else if self.avr_plc > 0x5dff {
1844 self.decode_num(bit_field, 6, DEC_HF3, POS_HF3)
1845 } else if self.avr_plc > 0x35ff {
1846 self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1847 } else if self.avr_plc > 0x0dff {
1848 self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1849 } else {
1850 self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1851 } & 0xff;
1852
1853 if self.st_mode {
1854 if byte_place == 0 && bit_field > 0x0fff {
1855 byte_place = 0x100;
1856 }
1857 if byte_place == 0 {
1858 let bit_field = self.bits.get_bits()?;
1859 self.bits.add_bits(1);
1860 if bit_field & 0x8000 != 0 {
1861 self.num_huf = 0;
1862 self.st_mode = false;
1863 return Ok(());
1864 }
1865
1866 let length = if bit_field & 0x4000 != 0 { 4 } else { 3 };
1867 self.bits.add_bits(1);
1868 let mut distance = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2);
1869 distance = (distance << 5) | (self.bits.get_bits()? >> 11);
1870 self.bits.add_bits(5);
1871 self.copy_string(distance, length, out)?;
1872 return Ok(());
1873 }
1874 byte_place -= 1;
1875 } else {
1876 if self.num_huf >= 16 && self.flags_cnt == 0 {
1877 self.st_mode = true;
1878 }
1879 self.num_huf += 1;
1880 }
1881
1882 self.avr_plc += byte_place;
1883 self.avr_plc -= self.avr_plc >> 8;
1884 self.nhfb += 16;
1885 if self.nhfb > 0xff {
1886 self.nhfb = 0x90;
1887 self.nlzb >>= 1;
1888 }
1889
1890 let byte = (self.ch_set[byte_place as usize] >> 8) as u8;
1891 self.put_byte(byte, out)?;
1892
1893 let idx = byte_place as usize;
1894 let mut cur_byte;
1895 let mut new_byte_place;
1896 loop {
1897 cur_byte = self.ch_set[idx] as u32;
1898 new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
1899 self.n_to_pl[(cur_byte & 0xff) as usize] =
1900 self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
1901 cur_byte += 1;
1902 if cur_byte & 0xff > 0xa1 {
1903 corr_huff(&mut self.ch_set, &mut self.n_to_pl);
1904 } else {
1905 break;
1906 }
1907 }
1908
1909 self.ch_set[idx] = self.ch_set[new_byte_place];
1910 self.ch_set[new_byte_place] = cur_byte as u16;
1911 Ok(())
1912 }
1913
1914 fn get_flags_buf(&mut self) -> Result<()> {
1915 let flags_place = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) as usize;
1916 if flags_place >= self.ch_set_c.len() {
1917 return Ok(());
1918 }
1919
1920 let mut flags;
1921 let mut new_flags_place;
1922 loop {
1923 flags = self.ch_set_c[flags_place] as u32;
1924 new_flags_place = self.n_to_pl_c[(flags & 0xff) as usize] as usize;
1925 self.n_to_pl_c[(flags & 0xff) as usize] =
1926 self.n_to_pl_c[(flags & 0xff) as usize].wrapping_add(1);
1927 self.flag_buf = flags >> 8;
1928 flags += 1;
1929 if flags & 0xff == 0 {
1930 corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
1931 } else {
1932 break;
1933 }
1934 }
1935
1936 self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
1937 self.ch_set_c[new_flags_place] = flags as u16;
1938 Ok(())
1939 }
1940
1941 fn decode_num(
1942 &mut self,
1943 num: u32,
1944 mut start_pos: u32,
1945 dec_tab: &[u16],
1946 pos_tab: &[u16],
1947 ) -> u32 {
1948 let num = num & 0xfff0;
1949 let mut i = 0usize;
1950 while dec_tab[i] as u32 <= num {
1951 start_pos += 1;
1952 i += 1;
1953 }
1954 self.bits.add_bits(start_pos as usize);
1955 ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1956 + pos_tab[start_pos as usize] as u32
1957 }
1958
1959 fn copy_string(&mut self, distance: u32, length: u32, out: &mut impl Write) -> Result<()> {
1960 if self.output_written + length as usize > self.target {
1961 return Err(Error::InvalidData("RAR 1.3 match exceeds output size"));
1962 }
1963
1964 if (!self.first_win_done && distance as usize > self.unp_ptr)
1965 || distance as usize > 0x10000
1966 || distance == 0
1967 {
1968 for _ in 0..length {
1969 self.put_byte(0, out)?;
1970 }
1971 } else {
1972 for _ in 0..length {
1973 let byte = self.window[(self.unp_ptr.wrapping_sub(distance as usize)) & 0xffff];
1974 self.put_byte(byte, out)?;
1975 }
1976 }
1977 Ok(())
1978 }
1979
1980 fn put_byte(&mut self, byte: u8, out: &mut impl Write) -> Result<()> {
1981 if self.output_written >= self.target {
1982 return Err(Error::InvalidData("RAR 1.3 literal exceeds output size"));
1983 }
1984 self.window[self.unp_ptr] = byte;
1985 self.unp_ptr = (self.unp_ptr + 1) & 0xffff;
1986 out.write_all(&[byte])
1987 .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
1988 self.output_written += 1;
1989 Ok(())
1990 }
1991
1992 fn remember_match(&mut self, distance: u32, length: u32) {
1993 self.old_dist[self.old_dist_ptr] = distance;
1994 self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
1995 self.last_length = length;
1996 self.last_dist = distance;
1997 }
1998
1999 fn short_len1(&self, pos: usize) -> u32 {
2000 if pos == 1 {
2001 self.buf60 + 3
2002 } else {
2003 SHORT_LEN1[pos] as u32
2004 }
2005 }
2006
2007 fn short_len2(&self, pos: usize) -> u32 {
2008 if pos == 3 {
2009 self.buf60 + 3
2010 } else {
2011 SHORT_LEN2[pos] as u32
2012 }
2013 }
2014
2015 fn init_huff(&mut self) {
2016 for i in 0..256 {
2017 self.ch_set[i] = (i as u16) << 8;
2018 self.ch_set_b[i] = (i as u16) << 8;
2019 self.ch_set_a[i] = i as u16;
2020 self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
2021 }
2022 self.n_to_pl = [0; 256];
2023 self.n_to_pl_b = [0; 256];
2024 self.n_to_pl_c = [0; 256];
2025 corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
2026 }
2027}
2028
2029impl Default for Unpack15 {
2030 fn default() -> Self {
2031 Self::new()
2032 }
2033}
2034
2035fn corr_huff(char_set: &mut [u16; 256], num_to_place: &mut [u8; 256]) {
2036 let mut pos = 0usize;
2037 for rank in (0..=7).rev() {
2038 for _ in 0..32 {
2039 char_set[pos] = (char_set[pos] & !0xff) | rank;
2040 pos += 1;
2041 }
2042 }
2043 *num_to_place = [0; 256];
2044 for rank in (0..=6).rev() {
2045 num_to_place[rank] = ((7 - rank) * 32) as u8;
2046 }
2047}
2048
2049#[cfg(test)]
2050mod tests {
2051 use super::{
2052 decode_num_bit_cost, decode_num_prefix_is_stable, find_long_lz, find_lz_token,
2053 find_old_dist_lz, long_lz_buckets, should_lazy_emit_literal, unpack15_decode,
2054 unpack15_encode, EncodeOptions, EncodedToken, LongLz, LzPlanState, OldDistLz, ShortLz,
2055 Unpack15, Unpack15Encoder, DEC_HF0, DEC_HF1, DEC_HF2, DEC_HF3, DEC_HF4, DEC_L1, DEC_L2,
2056 POS_HF0, POS_HF1, POS_HF2, POS_HF3, POS_HF4, POS_L1, POS_L2,
2057 };
2058
2059 fn brute_decode_num_bit_cost(
2060 target: u32,
2061 start_pos: u32,
2062 dec_tab: &[u16],
2063 pos_tab: &[u16],
2064 ) -> Option<usize> {
2065 for len in start_pos as usize..=16 {
2066 for code in 0..(1u32 << len) {
2067 if decode_num_prefix_is_stable(code, len, target, start_pos, dec_tab, pos_tab) {
2068 return Some(len);
2069 }
2070 }
2071 }
2072 None
2073 }
2074
2075 #[test]
2076 fn decode_member_from_reader_accepts_incremental_input() {
2077 struct TinyReader<'a> {
2078 input: &'a [u8],
2079 }
2080
2081 impl std::io::Read for TinyReader<'_> {
2082 fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2083 if self.input.is_empty() {
2084 return Ok(0);
2085 }
2086 let len = self.input.len().min(out.len()).min(2);
2087 out[..len].copy_from_slice(&self.input[..len]);
2088 self.input = &self.input[len..];
2089 Ok(len)
2090 }
2091 }
2092
2093 let expected = b"RAR 1.4 incremental input fixture\n".repeat(32);
2094 let packed = unpack15_encode(&expected).unwrap();
2095 assert_eq!(unpack15_decode(&packed, expected.len()).unwrap(), expected);
2096
2097 let mut reader = TinyReader { input: &packed };
2098 let mut decoder = Unpack15::new();
2099 let mut output = Vec::new();
2100 decoder
2101 .decode_member_from_reader(&mut reader, expected.len(), false, &mut output)
2102 .unwrap();
2103
2104 assert_eq!(output, expected);
2105 }
2106
2107 #[test]
2108 fn decode_num_bit_cost_matches_prefix_search_at_table_boundaries() {
2109 let tables = [
2110 (4, DEC_HF0, POS_HF0),
2111 (5, DEC_HF1, POS_HF1),
2112 (5, DEC_HF2, POS_HF2),
2113 (6, DEC_HF3, POS_HF3),
2114 (8, DEC_HF4, POS_HF4),
2115 (2, DEC_L1, POS_L1),
2116 (3, DEC_L2, POS_L2),
2117 ];
2118 let targets = [0, 1, 2, 3, 7, 8, 16, 24, 32, 33, 53, 117, 233, 255, 256];
2119
2120 for (start_pos, dec_tab, pos_tab) in tables {
2121 for target in targets {
2122 assert_eq!(
2123 decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2124 brute_decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2125 "target {target}, start_pos {start_pos}"
2126 );
2127 }
2128 }
2129 }
2130
2131 #[test]
2132 fn encoder_emits_rar15_very_long_lz_matches() {
2133 let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2134 input.extend_from_within(..258);
2135
2136 assert_eq!(
2137 find_long_lz(&input, 300, 0x8000),
2138 Some(LongLz {
2139 distance: 300,
2140 length: 258
2141 })
2142 );
2143 let packed = unpack15_encode(&input).unwrap();
2144
2145 assert!(
2146 packed.len() < 330,
2147 "very-long LongLZ should encode a 258-byte repeat compactly, got {} bytes",
2148 packed.len()
2149 );
2150 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2151 }
2152
2153 #[test]
2154 fn encoder_adjusts_rar15_long_lz_length_for_far_distance_bonus() {
2155 let mut input: Vec<_> = (0..9000).map(|index| (index * 73 + 19) as u8).collect();
2156 input.extend_from_within(..10);
2157
2158 let packed = unpack15_encode(&input).unwrap();
2159
2160 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2161 }
2162
2163 #[test]
2164 fn encoder_reuses_rar15_repeat_last_token() {
2165 let input = b"abcdefghijklmnop".repeat(64);
2166 let packed = unpack15_encode(&input).unwrap();
2167
2168 assert!(
2169 packed.len() < 100,
2170 "repeat-last tokens should keep a simple repeated pattern compact, got {} bytes",
2171 packed.len()
2172 );
2173 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2174 }
2175
2176 #[test]
2177 fn old_distance_finder_maps_ring_entries_to_short_lz_codes() {
2178 let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2179 let pos = input.len();
2180 input.extend_from_within(pos - 33..pos - 13);
2181
2182 assert_eq!(
2183 find_old_dist_lz(&input, pos, [11, 22, 33, 44], 0, 0x2001),
2184 Some(OldDistLz {
2185 distance: 33,
2186 length: 20,
2187 short_code: 11,
2188 })
2189 );
2190 }
2191
2192 #[test]
2193 fn old_distance_finder_rejects_buf60_toggle_encoding() {
2194 let mut input = b"abcd".repeat(128);
2195 let pos = input.len();
2196 input.extend((0..257).map(|index| b"abcd"[index % 4]));
2197
2198 assert_eq!(
2199 find_old_dist_lz(&input, pos, [u32::MAX, 4, u32::MAX, u32::MAX], 2, 0x2001),
2200 None,
2201 "short-code 10 with decoded base length 0x101 is the Buf60 toggle, not a match"
2202 );
2203 }
2204
2205 #[test]
2206 fn planner_emits_safe_old_distance_token() {
2207 let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2208 let pos = input.len();
2209 input.extend_from_within(pos - 33..pos - 13);
2210
2211 let encoder = Unpack15Encoder::new();
2212 let buckets = long_lz_buckets(&input);
2213 let token = encoder
2214 .choose_lz_token(
2215 &input,
2216 pos,
2217 &buckets,
2218 LzPlanState {
2219 last_dist: u32::MAX,
2220 last_length: 0,
2221 old_dist: [11, 22, 33, 44],
2222 old_dist_ptr: 0,
2223 max_dist3: 0x2001,
2224 nlzb: encoder.nlzb,
2225 nhfb: encoder.nhfb,
2226 l_count: encoder.l_count,
2227 },
2228 0,
2229 )
2230 .expect("old-distance candidate should be selected");
2231
2232 assert_eq!(
2233 token,
2234 EncodedToken::OldDist(OldDistLz {
2235 distance: 33,
2236 length: 20,
2237 short_code: 11,
2238 })
2239 );
2240 }
2241
2242 #[test]
2243 fn encoder_exits_stmode_when_literal_runs_trigger_decoder_mode() {
2244 let input: Vec<_> = (0..96).map(|index| (index * 73 + 19) as u8).collect();
2245 let packed = unpack15_encode(&input).unwrap();
2246
2247 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2248 }
2249
2250 #[test]
2251 fn encoder_emits_stmode_literals_for_long_literal_runs() {
2252 let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2253 let mut encoder = Unpack15Encoder::new();
2254 let packed = encoder.encode_member(&input).unwrap();
2255
2256 assert!(
2257 encoder.stmode_literal_count > 0,
2258 "long literal runs should use stmode literals before exiting stmode"
2259 );
2260 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2261 }
2262
2263 #[test]
2264 fn encoder_options_can_disable_stmode_literal_runs() {
2265 let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2266 let mut encoder =
2267 Unpack15Encoder::with_options(EncodeOptions::new().with_stmode_literal_runs(false));
2268 let packed = encoder.encode_member(&input).unwrap();
2269
2270 assert_eq!(encoder.stmode_literal_count, 0);
2271 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2272 }
2273
2274 #[test]
2275 fn encoder_options_bound_long_lz_search_distance() {
2276 let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2277 input.extend_from_within(..64);
2278
2279 assert_eq!(find_long_lz(&input, 300, 256), None);
2280 assert_eq!(
2281 find_long_lz(&input, 300, 0x8000),
2282 Some(LongLz {
2283 distance: 300,
2284 length: 64
2285 })
2286 );
2287 }
2288
2289 #[test]
2290 fn long_lz_search_rejects_unencodable_32k_distance() {
2291 let mut input = Vec::with_capacity(0x8000 + 64);
2292 let mut state = 0x1234_5678u32;
2293 for _ in 0..0x8000 + 64 {
2294 state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
2295 input.push((state >> 24) as u8);
2296 }
2297 let repeated = input[..64].to_vec();
2298 input[0x8000..0x8000 + 64].copy_from_slice(&repeated);
2299
2300 let token = find_long_lz(&input, 0x8000, 0x8000);
2301
2302 assert_ne!(token.map(|token| token.distance), Some(0x8000));
2303 }
2304
2305 #[test]
2306 fn lazy_match_prefers_longer_next_position_match() {
2307 let input = b"abcXbcQRSTabcQRSTUV";
2308 let buckets = long_lz_buckets(input);
2309 let token = find_lz_token(
2310 input,
2311 10,
2312 &buckets,
2313 LzPlanState {
2314 last_dist: u32::MAX,
2315 last_length: 0,
2316 old_dist: [u32::MAX; 4],
2317 old_dist_ptr: 0,
2318 max_dist3: 0x2001,
2319 nlzb: 0,
2320 nhfb: 0,
2321 l_count: 0,
2322 },
2323 EncodeOptions::default(),
2324 )
2325 .unwrap();
2326
2327 assert!(matches!(
2328 token,
2329 EncodedToken::ShortLz(super::ShortLz { length: 3, .. })
2330 ));
2331 assert!(should_lazy_emit_literal(
2332 input,
2333 10,
2334 &buckets,
2335 token,
2336 0x2001,
2337 EncodeOptions::default()
2338 ));
2339
2340 let packed = unpack15_encode(input).unwrap();
2341 assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2342 }
2343
2344 #[test]
2345 fn cost_aware_selection_prefers_better_bits_per_byte_token() {
2346 let mut input = vec![b'Z'; 40];
2347 input[7] = b'A';
2348 input[8] = b'A';
2349 input[9] = b'A';
2350 input[10] = b'B';
2351 input[39] = b'A';
2352 let pos = input.len();
2353 input.extend_from_slice(b"AAAAAAAAAA");
2354
2355 let mut encoder = Unpack15Encoder::new();
2356 encoder.old_dist = [u32::MAX, u32::MAX, u32::MAX, 33];
2357 let buckets = long_lz_buckets(&input);
2358 let token = encoder
2359 .choose_lz_token(
2360 &input,
2361 pos,
2362 &buckets,
2363 LzPlanState {
2364 last_dist: u32::MAX,
2365 last_length: 0,
2366 old_dist: encoder.old_dist,
2367 old_dist_ptr: encoder.old_dist_ptr,
2368 max_dist3: encoder.max_dist3,
2369 nlzb: encoder.nlzb,
2370 nhfb: encoder.nhfb,
2371 l_count: encoder.l_count,
2372 },
2373 0,
2374 )
2375 .unwrap();
2376
2377 assert_eq!(
2378 token,
2379 EncodedToken::ShortLz(ShortLz {
2380 distance: 1,
2381 length: 10,
2382 })
2383 );
2384 }
2385
2386 #[test]
2387 fn planner_uses_simulated_max_dist3_for_old_distance_candidates() {
2388 let mut state = 0x1234_5678u32;
2389 let mut input = Vec::with_capacity(9004);
2390 for _ in 0..9004 {
2391 state ^= state << 13;
2392 state ^= state >> 17;
2393 state ^= state << 5;
2394 input.push(state as u8);
2395 }
2396 let pos = 9000;
2397 let prefix = [input[0], input[1], input[2]];
2398 input[pos..pos + 3].copy_from_slice(&prefix);
2399 input[pos + 3] = input[3].wrapping_add(1);
2400
2401 let mut encoder = Unpack15Encoder::new();
2402 encoder.max_dist3 = 0x7f00;
2403 let buckets = long_lz_buckets(&input);
2404 let token = encoder.choose_lz_token(
2405 &input,
2406 pos,
2407 &buckets,
2408 LzPlanState {
2409 last_dist: u32::MAX,
2410 last_length: 0,
2411 old_dist: [u32::MAX, u32::MAX, u32::MAX, 9000],
2412 old_dist_ptr: 0,
2413 max_dist3: 0x2001,
2414 nlzb: encoder.nlzb,
2415 nhfb: encoder.nhfb,
2416 l_count: encoder.l_count,
2417 },
2418 0,
2419 );
2420
2421 assert_eq!(token, None);
2422 }
2423
2424 #[test]
2425 fn encoder_round_trips_source_shaped_payload() {
2426 let source = include_bytes!("rar13.rs");
2427 let input = &source[..source.len().min(50_902)];
2428
2429 let packed = unpack15_encode(input).unwrap();
2430 let decoded = unpack15_decode(&packed, input.len()).unwrap();
2431
2432 let first_diff = decoded
2433 .iter()
2434 .zip(input)
2435 .position(|(actual, expected)| actual != expected);
2436 assert_eq!(first_diff, None, "first differing byte in decoded payload");
2437 assert_eq!(decoded, input);
2438 }
2439}
2440
2441#[derive(Clone)]
2442struct BitReader {
2443 input: Vec<u8>,
2444 bit_pos: usize,
2445 final_input: bool,
2446}
2447
2448impl BitReader {
2449 fn new(input: &[u8]) -> Self {
2450 Self {
2451 input: input.to_vec(),
2452 bit_pos: 0,
2453 final_input: false,
2454 }
2455 }
2456
2457 fn new_final(input: &[u8]) -> Self {
2458 Self {
2459 input: input.to_vec(),
2460 bit_pos: 0,
2461 final_input: true,
2462 }
2463 }
2464
2465 fn append(&mut self, input: &[u8]) {
2466 self.compact();
2467 self.input.extend_from_slice(input);
2468 }
2469
2470 fn finish(&mut self) {
2471 self.final_input = true;
2472 }
2473
2474 fn compact(&mut self) {
2475 let bytes = self.bit_pos / 8;
2476 if bytes == 0 {
2477 return;
2478 }
2479 self.input.drain(..bytes);
2480 self.bit_pos -= bytes * 8;
2481 }
2482
2483 fn get_bits(&self) -> Result<u32> {
2484 let mut value = 0u32;
2485 for i in 0..16 {
2486 value <<= 1;
2487 let bit_index = self.bit_pos + i;
2488 let byte = match self.input.get(bit_index / 8).copied() {
2489 Some(byte) => byte,
2490 None if self.final_input => 0,
2491 None => return Err(Error::NeedMoreInput),
2492 };
2493 value |= ((byte >> (7 - (bit_index % 8))) & 1) as u32;
2494 }
2495 Ok(value)
2496 }
2497
2498 fn add_bits(&mut self, count: usize) {
2499 self.bit_pos += count;
2500 }
2501}
2502
2503#[derive(Default)]
2504struct BitWriter {
2505 output: Vec<u8>,
2506 bit_pos: usize,
2507}
2508
2509impl BitWriter {
2510 fn new() -> Self {
2511 Self {
2512 output: Vec::new(),
2513 bit_pos: 0,
2514 }
2515 }
2516
2517 fn write_bits(&mut self, value: u32, count: usize) {
2518 for i in (0..count).rev() {
2519 let bit = ((value >> i) & 1) as u8;
2520 if self.bit_pos.is_multiple_of(8) {
2521 self.output.push(0);
2522 }
2523 if bit != 0 {
2524 let idx = self.output.len() - 1;
2525 self.output[idx] |= 1 << (7 - (self.bit_pos % 8));
2526 }
2527 self.bit_pos += 1;
2528 }
2529 }
2530
2531 fn finish(self) -> Vec<u8> {
2532 self.output
2533 }
2534}