Skip to main content

rars_codec/
rar13.rs

1use crate::{Error, Result};
2use std::io::{Read, Write};
3
4const MATCH_HASH_BUCKETS: usize = 4096;
5const MAX_LONG_MATCH_CANDIDATES: usize = 64;
6const MAX_LONG_LZ_DISTANCE: usize = 0x7fff;
7
8const DEC_L1: &[u16] = &[
9    0x8000, 0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf200, 0xffff,
10];
11const POS_L1: &[u16] = &[0, 0, 0, 2, 3, 5, 7, 11, 16, 20, 24, 32, 32];
12const DEC_L2: &[u16] = &[
13    0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf240, 0xffff,
14];
15const POS_L2: &[u16] = &[0, 0, 0, 0, 5, 7, 9, 13, 18, 22, 26, 34, 36];
16const DEC_HF0: &[u16] = &[
17    0x8000, 0xc000, 0xe000, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xffff,
18];
19const POS_HF0: &[u16] = &[0, 0, 0, 0, 0, 8, 16, 24, 33, 33, 33, 33, 33];
20const DEC_HF1: &[u16] = &[
21    0x2000, 0xc000, 0xe000, 0xf000, 0xf200, 0xf200, 0xf7e0, 0xffff,
22];
23const POS_HF1: &[u16] = &[0, 0, 0, 0, 0, 0, 4, 44, 60, 76, 80, 80, 127];
24const DEC_HF2: &[u16] = &[
25    0x1000, 0x2400, 0x8000, 0xc000, 0xfa00, 0xffff, 0xffff, 0xffff,
26];
27const POS_HF2: &[u16] = &[0, 0, 0, 0, 0, 0, 2, 7, 53, 117, 233, 0, 0];
28const DEC_HF3: &[u16] = &[0x0800, 0x2400, 0xee00, 0xfe80, 0xffff, 0xffff, 0xffff];
29const POS_HF3: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 2, 16, 218, 251, 0, 0];
30const DEC_HF4: &[u16] = &[0xff00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff];
31const POS_HF4: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0];
32
33const SHORT_LEN1: [u8; 16] = [1, 3, 4, 4, 5, 6, 7, 8, 8, 4, 4, 5, 6, 6, 4, 0];
34const SHORT_XOR1: [u8; 15] = [
35    0x00, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
36];
37const SHORT_LEN2: [u8; 16] = [2, 3, 3, 3, 4, 4, 5, 6, 6, 4, 4, 5, 6, 6, 4, 0];
38const SHORT_XOR2: [u8; 15] = [
39    0x00, 0x40, 0x60, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
40];
41
42pub fn unpack15_encode(input: &[u8]) -> Result<Vec<u8>> {
43    unpack15_encode_with_options(input, EncodeOptions::default())
44}
45
46pub fn unpack15_encode_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
47    if input.is_empty() {
48        return Ok(Vec::new());
49    }
50
51    let mut encoder = Unpack15Encoder::with_options(options);
52    encoder.encode_member(input)
53}
54
55pub fn unpack15_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
56    let mut decoder = Unpack15::new();
57    decoder.decode_member(input, output_size, false)
58}
59
60pub struct Unpack15Encoder {
61    bits: BitWriter,
62    options: EncodeOptions,
63    // State names follow RAR13_FORMAT_SPECIFICATION.md ยง6 so the codec state
64    // lines up directly with the documented Unpack15 tables and traces.
65    ch_set: [u16; 256],
66    ch_set_c: [u16; 256],
67    ch_set_b: [u16; 256],
68    n_to_pl: [u8; 256],
69    n_to_pl_b: [u8; 256],
70    n_to_pl_c: [u8; 256],
71    ch_set_a: [u16; 256],
72    avr_plc: u32,
73    avr_plc_b: u32,
74    avr_ln1: u32,
75    avr_ln2: u32,
76    avr_ln3: u32,
77    max_dist3: u32,
78    nhfb: u32,
79    nlzb: u32,
80    num_huf: u32,
81    old_dist: [u32; 4],
82    old_dist_ptr: usize,
83    last_dist: u32,
84    last_length: u32,
85    l_count: u32,
86    #[cfg(test)]
87    stmode_literal_count: usize,
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub struct EncodeOptions {
92    old_distance_tokens: bool,
93    lazy_matching: bool,
94    stmode_literal_runs: bool,
95    max_long_match_distance: usize,
96}
97
98impl EncodeOptions {
99    pub const fn new() -> Self {
100        Self {
101            old_distance_tokens: true,
102            lazy_matching: true,
103            stmode_literal_runs: true,
104            max_long_match_distance: MAX_LONG_LZ_DISTANCE,
105        }
106    }
107
108    pub const fn with_old_distance_tokens(mut self, enabled: bool) -> Self {
109        self.old_distance_tokens = enabled;
110        self
111    }
112
113    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
114        self.lazy_matching = enabled;
115        self
116    }
117
118    pub const fn with_stmode_literal_runs(mut self, enabled: bool) -> Self {
119        self.stmode_literal_runs = enabled;
120        self
121    }
122
123    pub const fn with_max_long_match_distance(mut self, distance: usize) -> Self {
124        self.max_long_match_distance = distance;
125        self
126    }
127
128    pub const fn old_distance_tokens_enabled(self) -> bool {
129        self.old_distance_tokens
130    }
131}
132
133impl Default for EncodeOptions {
134    fn default() -> Self {
135        Self::new()
136    }
137}
138
139impl Unpack15Encoder {
140    pub fn new() -> Self {
141        Self::with_options(EncodeOptions::default())
142    }
143
144    pub fn with_options(options: EncodeOptions) -> Self {
145        let mut encoder = Self {
146            bits: BitWriter::new(),
147            options,
148            ch_set: [0; 256],
149            ch_set_c: [0; 256],
150            ch_set_b: [0; 256],
151            n_to_pl: [0; 256],
152            n_to_pl_b: [0; 256],
153            n_to_pl_c: [0; 256],
154            ch_set_a: [0; 256],
155            avr_plc: 0x3500,
156            avr_plc_b: 0,
157            avr_ln1: 0,
158            avr_ln2: 0,
159            avr_ln3: 0,
160            max_dist3: 0x2001,
161            nhfb: 0x80,
162            nlzb: 0x80,
163            num_huf: 0,
164            old_dist: [u32::MAX; 4],
165            old_dist_ptr: 0,
166            last_dist: u32::MAX,
167            last_length: 0,
168            l_count: 0,
169            #[cfg(test)]
170            stmode_literal_count: 0,
171        };
172        encoder.init_huff();
173        encoder
174    }
175
176    pub fn encode_literals_only(mut self, input: &[u8]) -> Result<Vec<u8>> {
177        self.encode_literals_only_member(input)
178    }
179
180    fn encode_literals_only_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
181        if input.is_empty() {
182            return Ok(Vec::new());
183        }
184        self.bits = BitWriter::new();
185        let mut pos = 0usize;
186        while pos < input.len() {
187            let mut flags = 0u8;
188            let mut flag_bits = 0usize;
189            let mut payloads = Vec::new();
190            let mut plan_nhfb = self.nhfb;
191            let mut plan_nlzb = self.nlzb;
192            let mut plan_num_huf = self.num_huf;
193            let mut group_enters_stmode = false;
194
195            while flag_bits < 8 && pos < input.len() {
196                let flag = huff_flag_bits(plan_nlzb <= plan_nhfb);
197                if flag_bits + flag.len() > 8 {
198                    break;
199                }
200                write_planned_flag_bits(&mut flags, flag_bits, flag);
201                payloads.push(EncodedToken::Literal(input[pos]));
202                flag_bits += flag.len();
203                if flag_bits == 8 && plan_num_huf >= 16 && pos + 1 < input.len() {
204                    group_enters_stmode = true;
205                }
206                plan_num_huf += 1;
207                pos += 1;
208                plan_huff_effect(&mut plan_nhfb, &mut plan_nlzb);
209            }
210
211            self.emit_flags_byte(flags)?;
212            self.emit_payloads(payloads)?;
213            if group_enters_stmode {
214                if self.options.stmode_literal_runs {
215                    self.emit_stmode_literal_run(input, &[], &mut pos, false)?;
216                }
217                self.emit_stmode_exit()?;
218            }
219        }
220        Ok(std::mem::take(&mut self.bits).finish())
221    }
222
223    pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
224        if input.is_empty() {
225            return Ok(Vec::new());
226        }
227        self.bits = BitWriter::new();
228        let buckets = long_lz_buckets(input);
229        let mut pos = 0usize;
230        while pos < input.len() {
231            let mut flags = 0u8;
232            let mut flag_bits = 0usize;
233            let mut payloads = Vec::new();
234            let mut plan_encoder = self.clone_for_planning();
235            let mut group_enters_stmode = false;
236
237            while flag_bits < 8 && pos < input.len() {
238                let state = plan_encoder.lz_plan_state();
239                if let Some(token) = plan_encoder
240                    .choose_lz_token(input, pos, &buckets, state, flag_bits)
241                    .filter(|token| {
242                        !self.options.lazy_matching
243                            || !should_lazy_emit_literal(
244                                input,
245                                pos,
246                                &buckets,
247                                *token,
248                                state.max_dist3,
249                                self.options,
250                            )
251                    })
252                {
253                    let flag = token.flag_bits(state.nlzb, state.nhfb);
254                    let next_pos = pos + token.length() as usize;
255                    let leaves_unfillable_flag_bit =
256                        flag_bits + flag.len() == 7 && next_pos < input.len();
257                    if flag_bits + flag.len() <= 8 && !leaves_unfillable_flag_bit {
258                        write_planned_flag_bits(&mut flags, flag_bits, flag);
259                        flag_bits += flag.len();
260                        pos = next_pos;
261                        plan_encoder.emit_payloads(vec![token])?;
262                        payloads.push(token);
263                        continue;
264                    }
265                }
266
267                let flag = huff_flag_bits(plan_encoder.nlzb <= plan_encoder.nhfb);
268                if flag_bits + flag.len() > 8 {
269                    break;
270                }
271                write_planned_flag_bits(&mut flags, flag_bits, flag);
272                let literal = input[pos];
273                payloads.push(EncodedToken::Literal(input[pos]));
274                flag_bits += flag.len();
275                if flag_bits == 8 && plan_encoder.num_huf >= 16 && pos + 1 < input.len() {
276                    group_enters_stmode = true;
277                }
278                pos += 1;
279                plan_encoder.emit_literal(literal)?;
280            }
281
282            self.emit_flags_byte(flags)?;
283            self.emit_payloads(payloads)?;
284            if group_enters_stmode {
285                if self.options.stmode_literal_runs {
286                    self.emit_stmode_literal_run(input, &buckets, &mut pos, true)?;
287                }
288                self.emit_stmode_exit()?;
289            }
290        }
291        Ok(std::mem::take(&mut self.bits).finish())
292    }
293
294    fn clone_for_planning(&self) -> Self {
295        Self {
296            bits: BitWriter::new(),
297            options: self.options,
298            ch_set: self.ch_set,
299            ch_set_c: self.ch_set_c,
300            ch_set_b: self.ch_set_b,
301            n_to_pl: self.n_to_pl,
302            n_to_pl_b: self.n_to_pl_b,
303            n_to_pl_c: self.n_to_pl_c,
304            ch_set_a: self.ch_set_a,
305            avr_plc: self.avr_plc,
306            avr_plc_b: self.avr_plc_b,
307            avr_ln1: self.avr_ln1,
308            avr_ln2: self.avr_ln2,
309            avr_ln3: self.avr_ln3,
310            max_dist3: self.max_dist3,
311            nhfb: self.nhfb,
312            nlzb: self.nlzb,
313            num_huf: self.num_huf,
314            old_dist: self.old_dist,
315            old_dist_ptr: self.old_dist_ptr,
316            last_dist: self.last_dist,
317            last_length: self.last_length,
318            l_count: self.l_count,
319            #[cfg(test)]
320            stmode_literal_count: self.stmode_literal_count,
321        }
322    }
323
324    fn lz_plan_state(&self) -> LzPlanState {
325        LzPlanState {
326            last_dist: self.last_dist,
327            last_length: self.last_length,
328            old_dist: self.old_dist,
329            old_dist_ptr: self.old_dist_ptr,
330            max_dist3: self.max_dist3,
331            nlzb: self.nlzb,
332            nhfb: self.nhfb,
333            l_count: self.l_count,
334        }
335    }
336
337    fn choose_lz_token(
338        &self,
339        input: &[u8],
340        pos: usize,
341        buckets: &[Vec<usize>],
342        state: LzPlanState,
343        flag_bits: usize,
344    ) -> Option<EncodedToken> {
345        let candidates = find_lz_tokens(input, pos, buckets, state, self.options);
346        candidates
347            .into_iter()
348            .filter(|token| {
349                let flag_len = token.flag_bits(state.nlzb, state.nhfb).len();
350                let next_pos = pos + token.length() as usize;
351                next_pos == input.len()
352                    || (flag_bits + flag_len != 7
353                        && !(flag_len == 1 && flag_bits.is_multiple_of(2)))
354            })
355            .filter_map(|token| self.token_bit_cost(token, state).map(|cost| (token, cost)))
356            .min_by(|(left, left_cost), (right, right_cost)| {
357                let left_score = left_cost * 256 / left.length() as usize;
358                let right_score = right_cost * 256 / right.length() as usize;
359                left_score
360                    .cmp(&right_score)
361                    .then_with(|| right.length().cmp(&left.length()))
362            })
363            .map(|(token, _)| token)
364    }
365
366    fn token_bit_cost(&self, token: EncodedToken, state: LzPlanState) -> Option<usize> {
367        let flag_cost = token.flag_bits(state.nlzb, state.nhfb).len();
368        match token {
369            EncodedToken::Literal(byte) => {
370                let place = self
371                    .ch_set
372                    .iter()
373                    .position(|&value| (value >> 8) as u8 == byte)?;
374                Some(flag_cost + self.literal_place_bit_cost(place)?)
375            }
376            EncodedToken::RepeatLast(_) => {
377                Some(flag_cost + self.repeat_last_bit_cost(state.l_count))
378            }
379            EncodedToken::ShortLz(token) => {
380                let distance_value = token.distance.checked_sub(1)?;
381                let distance_place = self
382                    .ch_set_a
383                    .iter()
384                    .position(|&value| value as u32 == distance_value)?;
385                Some(
386                    flag_cost
387                        + l_count_break_bit_cost(state.l_count)
388                        + self.short_lz_prefix_bit_cost(token.length - 2)?
389                        + decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)?,
390                )
391            }
392            EncodedToken::OldDist(token) => {
393                let length_code = old_dist_lz_length_code(
394                    token.length,
395                    token.distance,
396                    state.max_dist3,
397                    token.short_code,
398                )?;
399                Some(
400                    flag_cost
401                        + l_count_break_bit_cost(state.l_count)
402                        + self.short_lz_prefix_bit_cost(token.short_code)?
403                        + decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)?,
404                )
405            }
406            EncodedToken::LongLz(token) => {
407                let length_code = long_lz_length_code_for_distance(token, state.max_dist3)?;
408                let distance_place = self.long_lz_distance_place(token.distance).ok()?;
409                Some(
410                    flag_cost
411                        + self.long_lz_length_bit_cost(length_code)?
412                        + self.long_lz_distance_bit_cost(distance_place)?
413                        + 7,
414                )
415            }
416        }
417    }
418
419    fn emit_payloads(&mut self, payloads: Vec<EncodedToken>) -> Result<()> {
420        for payload in payloads {
421            match payload {
422                EncodedToken::Literal(byte) => self.emit_literal(byte)?,
423                EncodedToken::ShortLz(short_lz) => {
424                    self.emit_short_lz(short_lz)?;
425                }
426                EncodedToken::RepeatLast(repeat) => {
427                    self.emit_repeat_last(repeat)?;
428                }
429                EncodedToken::OldDist(old_lz) => {
430                    self.emit_old_dist_lz(old_lz)?;
431                }
432                EncodedToken::LongLz(long_lz) => {
433                    self.emit_long_lz(long_lz)?;
434                }
435            }
436        }
437
438        Ok(())
439    }
440
441    fn emit_flags_byte(&mut self, flags: u8) -> Result<()> {
442        let flags_place = self
443            .ch_set_c
444            .iter()
445            .position(|&value| (value >> 8) as u8 == flags)
446            .ok_or(Error::InvalidData("RAR 1.3 flag byte is not encodable"))?;
447        emit_decode_num(&mut self.bits, flags_place as u32, 5, DEC_HF2, POS_HF2)?;
448
449        let mut cur_flags;
450        let mut new_flags_place;
451        loop {
452            cur_flags = self.ch_set_c[flags_place] as u32;
453            new_flags_place = self.n_to_pl_c[(cur_flags & 0xff) as usize] as usize;
454            self.n_to_pl_c[(cur_flags & 0xff) as usize] =
455                self.n_to_pl_c[(cur_flags & 0xff) as usize].wrapping_add(1);
456            cur_flags += 1;
457            if cur_flags & 0xff == 0 {
458                corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
459            } else {
460                break;
461            }
462        }
463
464        self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
465        self.ch_set_c[new_flags_place] = cur_flags as u16;
466        Ok(())
467    }
468
469    fn emit_literal(&mut self, byte: u8) -> Result<()> {
470        let byte_place = self
471            .ch_set
472            .iter()
473            .position(|&value| (value >> 8) as u8 == byte)
474            .ok_or(Error::InvalidData("RAR 1.3 literal is not encodable"))?;
475        self.emit_literal_place(byte_place, byte_place, true)
476    }
477
478    fn emit_stmode_literal(&mut self, byte: u8) -> Result<()> {
479        let byte_place = self
480            .ch_set
481            .iter()
482            .position(|&value| (value >> 8) as u8 == byte)
483            .ok_or(Error::InvalidData(
484                "RAR 1.3 stmode literal is not encodable",
485            ))?;
486        #[cfg(test)]
487        {
488            self.stmode_literal_count += 1;
489        }
490        self.emit_literal_place(byte_place + 1, byte_place, false)
491    }
492
493    fn emit_stmode_literal_run(
494        &mut self,
495        input: &[u8],
496        buckets: &[Vec<usize>],
497        pos: &mut usize,
498        allow_lz: bool,
499    ) -> Result<()> {
500        while *pos + 1 < input.len() {
501            if allow_lz
502                && find_lz_token(
503                    input,
504                    *pos,
505                    buckets,
506                    LzPlanState {
507                        last_dist: self.last_dist,
508                        last_length: self.last_length,
509                        old_dist: self.old_dist,
510                        old_dist_ptr: self.old_dist_ptr,
511                        max_dist3: self.max_dist3,
512                        nlzb: self.nlzb,
513                        nhfb: self.nhfb,
514                        l_count: self.l_count,
515                    },
516                    self.options,
517                )
518                .is_some()
519            {
520                break;
521            }
522            self.emit_stmode_literal(input[*pos])?;
523            *pos += 1;
524        }
525        Ok(())
526    }
527
528    fn emit_literal_place(
529        &mut self,
530        encoded_place: usize,
531        decoded_place: usize,
532        update_num_huf: bool,
533    ) -> Result<()> {
534        if encoded_place > self.ch_set.len() || decoded_place >= self.ch_set.len() {
535            return Err(Error::InvalidData("RAR 1.3 literal is not encodable"));
536        }
537
538        let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
539            (8, DEC_HF4, POS_HF4)
540        } else if self.avr_plc > 0x5dff {
541            (6, DEC_HF3, POS_HF3)
542        } else if self.avr_plc > 0x35ff {
543            (5, DEC_HF2, POS_HF2)
544        } else if self.avr_plc > 0x0dff {
545            (5, DEC_HF1, POS_HF1)
546        } else {
547            (4, DEC_HF0, POS_HF0)
548        };
549        emit_decode_num(
550            &mut self.bits,
551            encoded_place as u32,
552            start_pos,
553            dec_tab,
554            pos_tab,
555        )?;
556
557        self.avr_plc += decoded_place as u32;
558        self.avr_plc -= self.avr_plc >> 8;
559        self.nhfb += 16;
560        if self.nhfb > 0xff {
561            self.nhfb = 0x90;
562            self.nlzb >>= 1;
563        }
564        if update_num_huf {
565            self.num_huf += 1;
566        }
567
568        let idx = decoded_place;
569        let mut cur_byte;
570        let mut new_byte_place;
571        loop {
572            cur_byte = self.ch_set[idx] as u32;
573            new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
574            self.n_to_pl[(cur_byte & 0xff) as usize] =
575                self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
576            cur_byte += 1;
577            if cur_byte & 0xff > 0xa1 {
578                corr_huff(&mut self.ch_set, &mut self.n_to_pl);
579            } else {
580                break;
581            }
582        }
583
584        self.ch_set[idx] = self.ch_set[new_byte_place];
585        self.ch_set[new_byte_place] = cur_byte as u16;
586        Ok(())
587    }
588
589    fn emit_short_lz(&mut self, short_lz: ShortLz) -> Result<()> {
590        self.num_huf = 0;
591        if self.l_count == 2 {
592            self.bits.write_bits(0, 1);
593            self.l_count = 0;
594        }
595        let length_place = short_lz.length - 2;
596        self.emit_short_lz_code(length_place as usize)?;
597        self.l_count = 0;
598
599        self.avr_ln1 += length_place;
600        self.avr_ln1 -= self.avr_ln1 >> 4;
601
602        let distance_value = short_lz.distance - 1;
603        let distance_place = self
604            .ch_set_a
605            .iter()
606            .position(|&value| value as u32 == distance_value)
607            .ok_or(Error::InvalidData(
608                "RAR 1.3 ShortLZ distance is not encodable",
609            ))?;
610        emit_decode_num(&mut self.bits, distance_place as u32, 5, DEC_HF2, POS_HF2)?;
611        if distance_place > 0 {
612            let last_distance = self.ch_set_a[distance_place - 1];
613            self.ch_set_a[distance_place] = last_distance;
614            self.ch_set_a[distance_place - 1] = distance_value as u16;
615        }
616        self.remember_match(short_lz.distance, short_lz.length);
617        Ok(())
618    }
619
620    fn emit_repeat_last(&mut self, repeat: RepeatLastLz) -> Result<()> {
621        if self.last_dist != repeat.distance || self.last_length != repeat.length {
622            return Err(Error::InvalidData(
623                "RAR 1.3 repeat-last state is not encodable",
624            ));
625        }
626        self.num_huf = 0;
627        if self.l_count == 2 {
628            self.bits.write_bits(1, 1);
629        } else {
630            self.emit_short_lz_code(9)?;
631            self.l_count += 1;
632        }
633        Ok(())
634    }
635
636    fn emit_old_dist_lz(&mut self, old_lz: OldDistLz) -> Result<()> {
637        self.num_huf = 0;
638        if self.l_count == 2 {
639            self.bits.write_bits(0, 1);
640            self.l_count = 0;
641        }
642        self.emit_short_lz_code(old_lz.short_code as usize)?;
643        self.l_count = 0;
644
645        let expected_distance = self.old_dist[(self
646            .old_dist_ptr
647            .wrapping_sub((old_lz.short_code - 9) as usize))
648            & 3];
649        if expected_distance != old_lz.distance {
650            return Err(Error::InvalidData(
651                "RAR 1.3 old-distance state is not encodable",
652            ));
653        }
654        let length_code = old_dist_lz_length_code(
655            old_lz.length,
656            old_lz.distance,
657            self.max_dist3,
658            old_lz.short_code,
659        )
660        .ok_or(Error::InvalidData(
661            "RAR 1.3 old-distance length is not encodable",
662        ))?;
663        emit_decode_num(&mut self.bits, length_code, 2, DEC_L1, POS_L1)?;
664        self.remember_match(old_lz.distance, old_lz.length);
665        Ok(())
666    }
667
668    fn emit_short_lz_code(&mut self, code: usize) -> Result<()> {
669        let (code_len, code_byte) = if self.avr_ln1 < 37 {
670            (self.short_len1(code), SHORT_XOR1[code])
671        } else {
672            (self.short_len2(code), SHORT_XOR2[code])
673        };
674        self.bits
675            .write_bits((code_byte >> (8 - code_len)) as u32, code_len as usize);
676        Ok(())
677    }
678
679    fn short_lz_prefix_bit_cost(&self, code: u32) -> Option<usize> {
680        let code = usize::try_from(code).ok()?;
681        if code >= SHORT_XOR1.len() {
682            return None;
683        }
684        Some(if self.avr_ln1 < 37 {
685            self.short_len1(code)
686        } else {
687            self.short_len2(code)
688        } as usize)
689    }
690
691    fn repeat_last_bit_cost(&self, l_count: u32) -> usize {
692        if l_count == 2 {
693            1
694        } else {
695            self.short_lz_prefix_bit_cost(9)
696                .expect("repeat-last code is encodable")
697        }
698    }
699
700    fn emit_long_lz(&mut self, long_lz: LongLz) -> Result<()> {
701        self.num_huf = 0;
702        self.nlzb += 16;
703        if self.nlzb > 0xff {
704            self.nlzb = 0x90;
705            self.nhfb >>= 1;
706        }
707        let old_avr2 = self.avr_ln2;
708
709        let length_code = self.long_lz_length_code(long_lz).ok_or(Error::InvalidData(
710            "RAR 1.3 LongLZ match length is not encodable for distance",
711        ))?;
712        emit_long_lz_length(&mut self.bits, self.avr_ln2, length_code)?;
713        self.avr_ln2 += length_code;
714        self.avr_ln2 -= self.avr_ln2 >> 5;
715
716        let distance_place = self.long_lz_distance_place(long_lz.distance)?;
717        let (start_pos, dec_tab, pos_tab) = if self.avr_plc_b > 0x28ff {
718            (5, DEC_HF2, POS_HF2)
719        } else if self.avr_plc_b > 0x06ff {
720            (5, DEC_HF1, POS_HF1)
721        } else {
722            (4, DEC_HF0, POS_HF0)
723        };
724        emit_decode_num(
725            &mut self.bits,
726            distance_place as u32,
727            start_pos,
728            dec_tab,
729            pos_tab,
730        )?;
731        self.avr_plc_b += distance_place as u32;
732        self.avr_plc_b -= self.avr_plc_b >> 8;
733
734        let idx = distance_place;
735        let mut distance;
736        let mut new_distance_place;
737        loop {
738            distance = self.ch_set_b[idx] as u32;
739            new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
740            self.n_to_pl_b[(distance & 0xff) as usize] =
741                self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
742            distance += 1;
743            if distance & 0xff == 0 {
744                corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
745            } else {
746                break;
747            }
748        }
749
750        self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
751        self.ch_set_b[new_distance_place] = distance as u16;
752
753        let low_byte = ((long_lz.distance << 1) & 0xff) as u8;
754        self.bits.write_bits((low_byte >> 1) as u32, 7);
755
756        let old_avr3 = self.avr_ln3;
757        if length_code != 1 && length_code != 4 {
758            if length_code == 0 && long_lz.distance <= self.max_dist3 {
759                self.avr_ln3 += 1;
760                self.avr_ln3 -= self.avr_ln3 >> 8;
761            } else if self.avr_ln3 > 0 {
762                self.avr_ln3 -= 1;
763            }
764        }
765        if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
766            self.max_dist3 = 0x7f00;
767        } else {
768            self.max_dist3 = 0x2001;
769        }
770
771        self.remember_match(long_lz.distance, long_lz.length);
772        Ok(())
773    }
774
775    fn long_lz_length_code(&self, long_lz: LongLz) -> Option<u32> {
776        long_lz_length_code_for_distance(long_lz, self.max_dist3)
777    }
778
779    fn long_lz_distance_place(&self, target_distance: u32) -> Result<usize> {
780        let wanted_high = ((target_distance << 1) & 0xff00) as u16;
781        self.ch_set_b
782            .iter()
783            .position(|&value| value & 0xff00 == wanted_high)
784            .ok_or(Error::InvalidData(
785                "RAR 1.3 LongLZ distance is not encodable",
786            ))
787    }
788
789    fn literal_place_bit_cost(&self, place: usize) -> Option<usize> {
790        if self.avr_plc > 0x75ff {
791            decode_num_bit_cost(place as u32, 8, DEC_HF4, POS_HF4)
792        } else if self.avr_plc > 0x5dff {
793            decode_num_bit_cost(place as u32, 6, DEC_HF3, POS_HF3)
794        } else if self.avr_plc > 0x35ff {
795            decode_num_bit_cost(place as u32, 5, DEC_HF2, POS_HF2)
796        } else if self.avr_plc > 0x0dff {
797            decode_num_bit_cost(place as u32, 5, DEC_HF1, POS_HF1)
798        } else {
799            decode_num_bit_cost(place as u32, 4, DEC_HF0, POS_HF0)
800        }
801    }
802
803    fn long_lz_length_bit_cost(&self, length_code: u32) -> Option<usize> {
804        if self.avr_ln2 >= 122 {
805            decode_num_bit_cost(length_code, 3, DEC_L2, POS_L2)
806        } else if self.avr_ln2 >= 64 {
807            decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)
808        } else if length_code <= 7 {
809            Some(length_code as usize + 1)
810        } else if length_code < 0x100 {
811            Some(16)
812        } else {
813            None
814        }
815    }
816
817    fn long_lz_distance_bit_cost(&self, distance_place: usize) -> Option<usize> {
818        if self.avr_plc_b > 0x28ff {
819            decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)
820        } else if self.avr_plc_b > 0x06ff {
821            decode_num_bit_cost(distance_place as u32, 5, DEC_HF1, POS_HF1)
822        } else {
823            decode_num_bit_cost(distance_place as u32, 4, DEC_HF0, POS_HF0)
824        }
825    }
826
827    fn emit_stmode_exit(&mut self) -> Result<()> {
828        let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
829            (8, DEC_HF4, POS_HF4)
830        } else if self.avr_plc > 0x5dff {
831            (6, DEC_HF3, POS_HF3)
832        } else if self.avr_plc > 0x35ff {
833            (5, DEC_HF2, POS_HF2)
834        } else if self.avr_plc > 0x0dff {
835            (5, DEC_HF1, POS_HF1)
836        } else {
837            (4, DEC_HF0, POS_HF0)
838        };
839        emit_decode_num(&mut self.bits, 0, start_pos, dec_tab, pos_tab)?;
840        self.bits.write_bits(1, 1);
841        self.num_huf = 0;
842        Ok(())
843    }
844
845    fn init_huff(&mut self) {
846        for i in 0..256 {
847            self.ch_set[i] = (i as u16) << 8;
848            self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
849            self.ch_set_b[i] = (i as u16) << 8;
850        }
851        self.n_to_pl = [0; 256];
852        self.n_to_pl_b = [0; 256];
853        self.n_to_pl_c = [0; 256];
854        for i in 0..256 {
855            self.ch_set_a[i] = i as u16;
856        }
857        corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
858    }
859
860    fn remember_match(&mut self, distance: u32, length: u32) {
861        self.old_dist[self.old_dist_ptr] = distance;
862        self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
863        self.last_length = length;
864        self.last_dist = distance;
865    }
866
867    fn short_len1(&self, pos: usize) -> u8 {
868        if pos == 1 {
869            3
870        } else {
871            SHORT_LEN1[pos]
872        }
873    }
874
875    fn short_len2(&self, pos: usize) -> u8 {
876        if pos == 3 {
877            3
878        } else {
879            SHORT_LEN2[pos]
880        }
881    }
882}
883
884impl Default for Unpack15Encoder {
885    fn default() -> Self {
886        Self::new()
887    }
888}
889
890#[derive(Debug, Clone, Copy)]
891struct LzPlanState {
892    last_dist: u32,
893    last_length: u32,
894    old_dist: [u32; 4],
895    old_dist_ptr: usize,
896    max_dist3: u32,
897    nlzb: u32,
898    nhfb: u32,
899    l_count: u32,
900}
901
902#[derive(Debug, Clone, Copy, PartialEq, Eq)]
903enum EncodedToken {
904    Literal(u8),
905    ShortLz(ShortLz),
906    RepeatLast(RepeatLastLz),
907    OldDist(OldDistLz),
908    LongLz(LongLz),
909}
910
911impl EncodedToken {
912    fn length(self) -> u32 {
913        match self {
914            Self::Literal(_) => 1,
915            Self::ShortLz(token) => token.length,
916            Self::RepeatLast(token) => token.length,
917            Self::OldDist(token) => token.length,
918            Self::LongLz(token) => token.length,
919        }
920    }
921
922    fn flag_bits(self, nlzb: u32, nhfb: u32) -> &'static [bool] {
923        match self {
924            Self::Literal(_) => huff_flag_bits(nlzb <= nhfb),
925            Self::LongLz(_) => long_lz_flag_bits(nlzb > nhfb),
926            Self::ShortLz(_) | Self::RepeatLast(_) | Self::OldDist(_) => &[false, false],
927        }
928    }
929}
930
931#[derive(Debug, Clone, Copy, PartialEq, Eq)]
932struct ShortLz {
933    pub distance: u32,
934    pub length: u32,
935}
936
937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
938struct RepeatLastLz {
939    pub distance: u32,
940    pub length: u32,
941}
942
943#[derive(Debug, Clone, Copy, PartialEq, Eq)]
944struct OldDistLz {
945    pub distance: u32,
946    pub length: u32,
947    pub short_code: u32,
948}
949
950#[derive(Debug, Clone, Copy, PartialEq, Eq)]
951pub struct LongLz {
952    pub distance: u32,
953    pub length: u32,
954}
955
956fn huff_flag_bits(prefer_huff_on_one: bool) -> &'static [bool] {
957    if prefer_huff_on_one {
958        &[true]
959    } else {
960        &[false, true]
961    }
962}
963
964fn long_lz_flag_bits(prefer_long_lz_on_one: bool) -> &'static [bool] {
965    if prefer_long_lz_on_one {
966        &[true]
967    } else {
968        &[false, true]
969    }
970}
971
972fn write_planned_flag_bits(flags: &mut u8, start: usize, bits: &[bool]) {
973    for (offset, &bit) in bits.iter().enumerate() {
974        if bit {
975            *flags |= 1 << (7 - start - offset);
976        }
977    }
978}
979
980fn plan_huff_effect(nhfb: &mut u32, nlzb: &mut u32) {
981    *nhfb += 16;
982    if *nhfb > 0xff {
983        *nhfb = 0x90;
984        *nlzb >>= 1;
985    }
986}
987
988fn l_count_break_bit_cost(l_count: u32) -> usize {
989    usize::from(l_count == 2)
990}
991
992fn find_lz_token(
993    input: &[u8],
994    pos: usize,
995    buckets: &[Vec<usize>],
996    state: LzPlanState,
997    options: EncodeOptions,
998) -> Option<EncodedToken> {
999    find_lz_tokens(input, pos, buckets, state, options)
1000        .into_iter()
1001        .next()
1002}
1003
1004fn find_lz_tokens(
1005    input: &[u8],
1006    pos: usize,
1007    buckets: &[Vec<usize>],
1008    state: LzPlanState,
1009    options: EncodeOptions,
1010) -> Vec<EncodedToken> {
1011    let mut tokens = Vec::with_capacity(4);
1012    if let Some(repeat) = find_repeat_last_lz(input, pos, state.last_dist, state.last_length) {
1013        tokens.push(EncodedToken::RepeatLast(repeat));
1014    }
1015    if options.old_distance_tokens {
1016        if let Some(old_lz) = find_old_dist_lz(
1017            input,
1018            pos,
1019            state.old_dist,
1020            state.old_dist_ptr,
1021            state.max_dist3,
1022        ) {
1023            tokens.push(EncodedToken::OldDist(old_lz));
1024        }
1025    }
1026    if let Some(short_lz) = find_short_lz(input, pos) {
1027        tokens.push(EncodedToken::ShortLz(short_lz));
1028    }
1029    if let Some(long_lz) = find_long_lz_with_buckets(
1030        input,
1031        pos,
1032        options.max_long_match_distance,
1033        buckets,
1034        MAX_LONG_MATCH_CANDIDATES,
1035    )
1036    .filter(|long_lz| long_lz_length_code_for_distance(*long_lz, state.max_dist3).is_some())
1037    {
1038        tokens.push(EncodedToken::LongLz(long_lz));
1039    }
1040    tokens
1041}
1042
1043fn should_lazy_emit_literal(
1044    input: &[u8],
1045    pos: usize,
1046    buckets: &[Vec<usize>],
1047    current: EncodedToken,
1048    max_dist3: u32,
1049    options: EncodeOptions,
1050) -> bool {
1051    if !matches!(current, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1052        || pos + 1 >= input.len()
1053    {
1054        return false;
1055    }
1056
1057    let next = find_lz_token(
1058        input,
1059        pos + 1,
1060        buckets,
1061        LzPlanState {
1062            last_dist: u32::MAX,
1063            last_length: 0,
1064            old_dist: [u32::MAX; 4],
1065            old_dist_ptr: 0,
1066            max_dist3,
1067            nlzb: 0,
1068            nhfb: 0,
1069            l_count: 0,
1070        },
1071        options,
1072    );
1073    next.is_some_and(|next| {
1074        matches!(next, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1075            && next.length() >= current.length() + 2
1076    })
1077}
1078
1079fn find_short_lz(input: &[u8], pos: usize) -> Option<ShortLz> {
1080    if pos < 2 {
1081        return None;
1082    }
1083
1084    let max_distance = pos.min(256);
1085    let mut best = ShortLz {
1086        distance: 0,
1087        length: 0,
1088    };
1089    for distance in 1..=max_distance {
1090        let mut length = 0usize;
1091        while length < 10
1092            && pos + length < input.len()
1093            && input[pos + length] == input[pos + length - distance]
1094        {
1095            length += 1;
1096        }
1097        if length >= 3
1098            && (length > best.length as usize
1099                || (length == best.length as usize && distance < best.distance as usize))
1100        {
1101            best = ShortLz {
1102                distance: distance as u32,
1103                length: length as u32,
1104            };
1105        }
1106    }
1107
1108    (best.length >= 3).then_some(best)
1109}
1110
1111fn find_repeat_last_lz(
1112    input: &[u8],
1113    pos: usize,
1114    last_dist: u32,
1115    last_length: u32,
1116) -> Option<RepeatLastLz> {
1117    if last_dist == u32::MAX || last_dist == 0 || last_length == 0 {
1118        return None;
1119    }
1120    let distance = usize::try_from(last_dist).ok()?;
1121    let length = usize::try_from(last_length).ok()?;
1122    if distance > pos || pos.checked_add(length)? > input.len() {
1123        return None;
1124    }
1125    let matches = (0..length).all(|offset| input[pos + offset] == input[pos + offset - distance]);
1126    matches.then_some(RepeatLastLz {
1127        distance: last_dist,
1128        length: last_length,
1129    })
1130}
1131
1132fn find_old_dist_lz(
1133    input: &[u8],
1134    pos: usize,
1135    old_dist: [u32; 4],
1136    old_dist_ptr: usize,
1137    _max_dist3: u32,
1138) -> Option<OldDistLz> {
1139    let mut best = OldDistLz {
1140        distance: 0,
1141        length: 0,
1142        short_code: 0,
1143    };
1144    for short_code in 10..=13 {
1145        let distance = old_dist[(old_dist_ptr.wrapping_sub((short_code - 9) as usize)) & 3];
1146        if distance == u32::MAX || distance == 0 {
1147            continue;
1148        }
1149        let Ok(distance_usize) = usize::try_from(distance) else {
1150            continue;
1151        };
1152        if distance_usize > pos {
1153            continue;
1154        }
1155        let mut length = 0usize;
1156        while length < 258
1157            && pos + length < input.len()
1158            && input[pos + length] == input[pos + length - distance_usize]
1159        {
1160            length += 1;
1161        }
1162        if length >= 3
1163            && old_dist_lz_is_encodable(length as u32, distance, short_code)
1164            && length > best.length as usize
1165        {
1166            best = OldDistLz {
1167                distance,
1168                length: length as u32,
1169                short_code,
1170            };
1171        }
1172    }
1173
1174    (best.length >= 3).then_some(best)
1175}
1176
1177fn old_dist_lz_is_encodable(length: u32, distance: u32, short_code: u32) -> bool {
1178    old_dist_lz_length_code(length, distance, 0x2001, short_code).is_some()
1179        && old_dist_lz_length_code(length, distance, 0x7f00, short_code).is_some()
1180}
1181
1182fn old_dist_lz_length_code(
1183    length: u32,
1184    distance: u32,
1185    max_dist3: u32,
1186    short_code: u32,
1187) -> Option<u32> {
1188    let decoded_bonus = u32::from(distance > 256) + u32::from(distance >= max_dist3);
1189    let length_code = length.checked_sub(2 + decoded_bonus)?;
1190    if short_code == 10 && length_code == 0xff {
1191        return None;
1192    }
1193    Some(length_code)
1194}
1195
1196fn long_lz_length_code_for_distance(long_lz: LongLz, max_dist3: u32) -> Option<u32> {
1197    let decoded_bonus =
1198        u32::from(long_lz.distance >= max_dist3) + u32::from(long_lz.distance <= 256);
1199    long_lz.length.checked_sub(3 + decoded_bonus)
1200}
1201
1202pub fn find_long_lz(input: &[u8], pos: usize, max_match_distance: usize) -> Option<LongLz> {
1203    if pos < 257 {
1204        return None;
1205    }
1206
1207    let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1208    if max_distance < 257 {
1209        return None;
1210    }
1211    let mut best = LongLz {
1212        distance: 0,
1213        length: 0,
1214    };
1215    for distance in 257..=max_distance {
1216        let mut length = 0usize;
1217        while length < 258
1218            && pos + length < input.len()
1219            && input[pos + length] == input[pos + length - distance]
1220        {
1221            length += 1;
1222        }
1223        if length >= 3 && length > best.length as usize {
1224            best = LongLz {
1225                distance: distance as u32,
1226                length: length as u32,
1227            };
1228        }
1229    }
1230
1231    (best.length >= 3).then_some(best)
1232}
1233
1234fn find_long_lz_with_buckets(
1235    input: &[u8],
1236    pos: usize,
1237    max_match_distance: usize,
1238    buckets: &[Vec<usize>],
1239    max_candidates: usize,
1240) -> Option<LongLz> {
1241    if pos < 257 || pos + 2 >= input.len() || max_candidates == 0 {
1242        return None;
1243    }
1244
1245    let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1246    if max_distance < 257 {
1247        return None;
1248    }
1249    let max_length = (input.len() - pos).min(258);
1250    let mut best = LongLz {
1251        distance: 0,
1252        length: 0,
1253    };
1254    let mut checked = 0usize;
1255    for &candidate in buckets[match_hash(input, pos)].iter().rev() {
1256        if candidate >= pos {
1257            continue;
1258        }
1259        let distance = pos - candidate;
1260        if distance > max_distance {
1261            break;
1262        }
1263        if distance < 257 {
1264            continue;
1265        }
1266        checked += 1;
1267        let mut length = 0usize;
1268        while length < max_length && input[pos + length] == input[pos + length - distance] {
1269            length += 1;
1270        }
1271        if length >= 3
1272            && (length > best.length as usize
1273                || (length == best.length as usize && distance < best.distance as usize))
1274        {
1275            best = LongLz {
1276                distance: distance as u32,
1277                length: length as u32,
1278            };
1279            if length == max_length {
1280                break;
1281            }
1282        }
1283        if checked >= max_candidates {
1284            break;
1285        }
1286    }
1287
1288    (best.length >= 3).then_some(best)
1289}
1290
1291fn long_lz_buckets(input: &[u8]) -> Vec<Vec<usize>> {
1292    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1293    for pos in 0..input.len().saturating_sub(2) {
1294        buckets[match_hash(input, pos)].push(pos);
1295    }
1296    buckets
1297}
1298
1299fn match_hash(input: &[u8], pos: usize) -> usize {
1300    let value =
1301        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1302    value & (MATCH_HASH_BUCKETS - 1)
1303}
1304
1305fn emit_long_lz_length(bits: &mut BitWriter, avr_ln2: u32, length_code: u32) -> Result<()> {
1306    if avr_ln2 >= 122 {
1307        return emit_decode_num(bits, length_code, 3, DEC_L2, POS_L2);
1308    }
1309    if avr_ln2 >= 64 {
1310        return emit_decode_num(bits, length_code, 2, DEC_L1, POS_L1);
1311    }
1312    if length_code <= 7 {
1313        bits.write_bits(1, length_code as usize + 1);
1314        return Ok(());
1315    }
1316    if length_code < 0x100 {
1317        bits.write_bits(length_code, 16);
1318        return Ok(());
1319    }
1320    Err(Error::InvalidData(
1321        "RAR 1.3 LongLZ encoder length is not encodable",
1322    ))
1323}
1324
1325fn emit_decode_num(
1326    bits: &mut BitWriter,
1327    target: u32,
1328    start_pos: u32,
1329    dec_tab: &[u16],
1330    pos_tab: &[u16],
1331) -> Result<()> {
1332    if let Some((code, len)) = encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab) {
1333        bits.write_bits(code, len);
1334        return Ok(());
1335    }
1336    Err(Error::InvalidData(
1337        "RAR 1.3 DecodeNum value is not encodable",
1338    ))
1339}
1340
1341fn decode_num_bit_cost(
1342    target: u32,
1343    start_pos: u32,
1344    dec_tab: &[u16],
1345    pos_tab: &[u16],
1346) -> Option<usize> {
1347    encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab).map(|(_, len)| len)
1348}
1349
1350fn encode_decode_num_prefix(
1351    target: u32,
1352    start_pos: u32,
1353    dec_tab: &[u16],
1354    pos_tab: &[u16],
1355) -> Option<(u32, usize)> {
1356    let end = 16.min(pos_tab.len().saturating_sub(1));
1357    for (len, &base) in pos_tab
1358        .iter()
1359        .enumerate()
1360        .take(end + 1)
1361        .skip(start_pos as usize)
1362    {
1363        let dec_index = len.checked_sub(start_pos as usize)?;
1364        let upper = u32::from(*dec_tab.get(dec_index)?);
1365        let previous = if dec_index == 0 {
1366            0
1367        } else {
1368            u32::from(dec_tab[dec_index - 1])
1369        };
1370        let max_num = upper.checked_sub(1)? & !0xf;
1371        if max_num < previous {
1372            continue;
1373        }
1374        let base = u32::from(base);
1375        let max_target = ((max_num - previous) >> (16 - len)) + base;
1376        if target >= base && target <= max_target {
1377            let num = previous + ((target - base) << (16 - len));
1378            return Some((num >> (16 - len), len));
1379        }
1380    }
1381    None
1382}
1383
1384#[cfg(test)]
1385fn decode_num_prefix_is_stable(
1386    code: u32,
1387    len: usize,
1388    target: u32,
1389    start_pos: u32,
1390    dec_tab: &[u16],
1391    pos_tab: &[u16],
1392) -> bool {
1393    let relevant_tail_bits = 16usize.saturating_sub(len + 4);
1394    for tail in 0..(1u32 << relevant_tail_bits) {
1395        let bit_field = (code << (16 - len)) | (tail << 4);
1396        let (decoded, consumed) = simulate_decode_num(bit_field, start_pos, dec_tab, pos_tab);
1397        if decoded != target || consumed != len {
1398            return false;
1399        }
1400    }
1401    true
1402}
1403
1404#[cfg(test)]
1405fn simulate_decode_num(
1406    bit_field: u32,
1407    mut start_pos: u32,
1408    dec_tab: &[u16],
1409    pos_tab: &[u16],
1410) -> (u32, usize) {
1411    let num = bit_field & 0xfff0;
1412    let mut i = 0usize;
1413    while dec_tab[i] as u32 <= num {
1414        start_pos += 1;
1415        i += 1;
1416    }
1417    (
1418        ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1419            + pos_tab[start_pos as usize] as u32,
1420        start_pos as usize,
1421    )
1422}
1423
1424#[derive(Clone)]
1425pub struct Unpack15 {
1426    bits: BitReader,
1427    target: usize,
1428    output_written: usize,
1429    window: [u8; 0x10000],
1430    unp_ptr: usize,
1431    prev_ptr: usize,
1432    first_win_done: bool,
1433    // State names follow RAR13_FORMAT_SPECIFICATION.md ยง6 so the codec state
1434    // lines up directly with the documented Unpack15 tables and traces.
1435    ch_set: [u16; 256],
1436    ch_set_a: [u16; 256],
1437    ch_set_b: [u16; 256],
1438    ch_set_c: [u16; 256],
1439    n_to_pl: [u8; 256],
1440    n_to_pl_b: [u8; 256],
1441    n_to_pl_c: [u8; 256],
1442    avr_plc: u32,
1443    avr_plc_b: u32,
1444    avr_ln1: u32,
1445    avr_ln2: u32,
1446    avr_ln3: u32,
1447    max_dist3: u32,
1448    nhfb: u32,
1449    nlzb: u32,
1450    num_huf: u32,
1451    buf60: u32,
1452    st_mode: bool,
1453    l_count: u32,
1454    flag_buf: u32,
1455    flags_cnt: i32,
1456    old_dist: [u32; 4],
1457    old_dist_ptr: usize,
1458    last_dist: u32,
1459    last_length: u32,
1460}
1461
1462impl Unpack15 {
1463    pub fn new() -> Self {
1464        Self {
1465            bits: BitReader::new(&[]),
1466            target: 0,
1467            output_written: 0,
1468            window: [0; 0x10000],
1469            unp_ptr: 0,
1470            prev_ptr: 0,
1471            first_win_done: false,
1472            ch_set: [0; 256],
1473            ch_set_a: [0; 256],
1474            ch_set_b: [0; 256],
1475            ch_set_c: [0; 256],
1476            n_to_pl: [0; 256],
1477            n_to_pl_b: [0; 256],
1478            n_to_pl_c: [0; 256],
1479            avr_plc: 0x3500,
1480            avr_plc_b: 0,
1481            avr_ln1: 0,
1482            avr_ln2: 0,
1483            avr_ln3: 0,
1484            max_dist3: 0x2001,
1485            nhfb: 0x80,
1486            nlzb: 0x80,
1487            num_huf: 0,
1488            buf60: 0,
1489            st_mode: false,
1490            l_count: 0,
1491            flag_buf: 0,
1492            flags_cnt: 0,
1493            old_dist: [u32::MAX; 4],
1494            old_dist_ptr: 0,
1495            last_dist: u32::MAX,
1496            last_length: 0,
1497        }
1498    }
1499
1500    pub fn decode_member(&mut self, input: &[u8], target: usize, solid: bool) -> Result<Vec<u8>> {
1501        let mut output = Vec::with_capacity(target);
1502        self.decode_member_to(input, target, solid, &mut output)?;
1503        Ok(output)
1504    }
1505
1506    pub fn decode_member_to(
1507        &mut self,
1508        input: &[u8],
1509        target: usize,
1510        solid: bool,
1511        out: &mut impl Write,
1512    ) -> Result<()> {
1513        self.init_member(target, solid);
1514        self.bits = BitReader::new_final(input);
1515        self.decode_loop(out).map_err(|error| match error {
1516            Error::NeedMoreInput => Error::InvalidData("RAR 1.3 bitstream is truncated"),
1517            error => error,
1518        })
1519    }
1520
1521    pub fn decode_member_from_reader(
1522        &mut self,
1523        input: &mut impl Read,
1524        target: usize,
1525        solid: bool,
1526        out: &mut impl Write,
1527    ) -> Result<()> {
1528        const INPUT_CHUNK: usize = 64 * 1024;
1529        const OUTPUT_CHUNK: usize = 64 * 1024;
1530
1531        self.init_member(target, solid);
1532        self.bits = BitReader::new(&[]);
1533        let mut input_done = false;
1534        let mut buffer = [0u8; INPUT_CHUNK];
1535
1536        while self.output_written < self.target {
1537            let chunk_target = self
1538                .output_written
1539                .saturating_add(OUTPUT_CHUNK)
1540                .min(self.target);
1541            loop {
1542                let checkpoint = self.clone();
1543                let mut chunk = Vec::with_capacity(chunk_target - self.output_written);
1544                match self.decode_loop_until(chunk_target, &mut chunk) {
1545                    Ok(()) => {
1546                        out.write_all(&chunk)
1547                            .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
1548                        break;
1549                    }
1550                    Err(Error::NeedMoreInput) if !input_done => {
1551                        *self = checkpoint;
1552                        let read = input
1553                            .read(&mut buffer)
1554                            .map_err(|_| Error::InvalidData("RAR 1.3 input read failed"))?;
1555                        if read == 0 {
1556                            input_done = true;
1557                            self.bits.finish();
1558                        } else {
1559                            self.bits.append(&buffer[..read]);
1560                        }
1561                    }
1562                    Err(Error::NeedMoreInput) => {
1563                        return Err(Error::InvalidData("RAR 1.3 bitstream is truncated"));
1564                    }
1565                    Err(error) => return Err(error),
1566                }
1567            }
1568        }
1569        Ok(())
1570    }
1571
1572    fn init_member(&mut self, target: usize, solid: bool) {
1573        self.target = target;
1574        self.output_written = 0;
1575        self.flags_cnt = -2;
1576        self.flag_buf = 0;
1577        self.st_mode = false;
1578        self.l_count = 0;
1579
1580        if !solid {
1581            self.reset_non_solid();
1582        }
1583    }
1584
1585    fn decode_loop(&mut self, out: &mut impl Write) -> Result<()> {
1586        if self.target == 0 {
1587            return Ok(());
1588        }
1589
1590        self.decode_loop_until(self.target, out)
1591    }
1592
1593    fn decode_loop_until(&mut self, target: usize, out: &mut impl Write) -> Result<()> {
1594        while self.output_written < target {
1595            self.decode_step(out)?;
1596        }
1597
1598        Ok(())
1599    }
1600
1601    fn decode_step(&mut self, out: &mut impl Write) -> Result<()> {
1602        if self.flags_cnt == -2 {
1603            self.get_flags_buf()?;
1604            self.flags_cnt = 8;
1605        }
1606
1607        self.unp_ptr &= 0xffff;
1608        self.first_win_done |= self.prev_ptr > self.unp_ptr;
1609        self.prev_ptr = self.unp_ptr;
1610
1611        if self.st_mode {
1612            return self.huff_decode(out);
1613        }
1614
1615        self.flags_cnt -= 1;
1616        if self.flags_cnt < 0 {
1617            self.get_flags_buf()?;
1618            self.flags_cnt = 7;
1619        }
1620
1621        if self.flag_buf & 0x80 != 0 {
1622            self.flag_buf = (self.flag_buf << 1) & 0xff;
1623            if self.nlzb > self.nhfb {
1624                self.long_lz(out)
1625            } else {
1626                self.huff_decode(out)
1627            }
1628        } else {
1629            self.flag_buf = (self.flag_buf << 1) & 0xff;
1630            self.flags_cnt -= 1;
1631            if self.flags_cnt < 0 {
1632                self.get_flags_buf()?;
1633                self.flags_cnt = 7;
1634            }
1635            if self.flag_buf & 0x80 != 0 {
1636                self.flag_buf = (self.flag_buf << 1) & 0xff;
1637                if self.nlzb > self.nhfb {
1638                    self.huff_decode(out)
1639                } else {
1640                    self.long_lz(out)
1641                }
1642            } else {
1643                self.flag_buf = (self.flag_buf << 1) & 0xff;
1644                self.short_lz(out)
1645            }
1646        }
1647    }
1648
1649    fn reset_non_solid(&mut self) {
1650        self.window = [0; 0x10000];
1651        self.unp_ptr = 0;
1652        self.prev_ptr = 0;
1653        self.first_win_done = false;
1654        self.avr_plc_b = 0;
1655        self.avr_ln1 = 0;
1656        self.avr_ln2 = 0;
1657        self.avr_ln3 = 0;
1658        self.num_huf = 0;
1659        self.buf60 = 0;
1660        self.avr_plc = 0x3500;
1661        self.max_dist3 = 0x2001;
1662        self.nhfb = 0x80;
1663        self.nlzb = 0x80;
1664        self.old_dist = [u32::MAX; 4];
1665        self.old_dist_ptr = 0;
1666        self.last_dist = u32::MAX;
1667        self.last_length = 0;
1668        self.init_huff();
1669    }
1670
1671    fn short_lz(&mut self, out: &mut impl Write) -> Result<()> {
1672        self.num_huf = 0;
1673        let mut bit_field = self.bits.get_bits()?;
1674        if self.l_count == 2 {
1675            self.bits.add_bits(1);
1676            if bit_field >= 0x8000 {
1677                self.copy_string(self.last_dist, self.last_length, out)?;
1678                return Ok(());
1679            }
1680            bit_field = (bit_field << 1) & 0xffff;
1681            self.l_count = 0;
1682        }
1683
1684        let bit_byte = (bit_field >> 8) as u8;
1685        let mut length = 0usize;
1686        if self.avr_ln1 < 37 {
1687            while length < SHORT_XOR1.len() {
1688                let short_len = self.short_len1(length);
1689                let mask = (!(0xffu16 >> short_len)) as u8;
1690                if ((bit_byte ^ SHORT_XOR1[length]) & mask) == 0 {
1691                    break;
1692                }
1693                length += 1;
1694            }
1695            self.bits.add_bits(self.short_len1(length) as usize);
1696        } else {
1697            while length < SHORT_XOR2.len() {
1698                let short_len = self.short_len2(length);
1699                let mask = (!(0xffu16 >> short_len)) as u8;
1700                if ((bit_byte ^ SHORT_XOR2[length]) & mask) == 0 {
1701                    break;
1702                }
1703                length += 1;
1704            }
1705            self.bits.add_bits(self.short_len2(length) as usize);
1706        }
1707
1708        let mut length = length as u32;
1709        if length >= 9 {
1710            if length == 9 {
1711                self.l_count += 1;
1712                self.copy_string(self.last_dist, self.last_length, out)?;
1713                return Ok(());
1714            }
1715            if length == 14 {
1716                self.l_count = 0;
1717                length = self.decode_num(self.bits.get_bits()?, 3, DEC_L2, POS_L2) + 5;
1718                let distance = (self.bits.get_bits()? >> 1) | 0x8000;
1719                self.bits.add_bits(15);
1720                self.last_length = length;
1721                self.last_dist = distance;
1722                self.copy_string(distance, length, out)?;
1723                return Ok(());
1724            }
1725
1726            self.l_count = 0;
1727            let save_length = length;
1728            let distance =
1729                self.old_dist[(self.old_dist_ptr.wrapping_sub((length - 9) as usize)) & 3];
1730            length = self.decode_num(self.bits.get_bits()?, 2, DEC_L1, POS_L1) + 2;
1731            if length == 0x101 && save_length == 10 {
1732                self.buf60 ^= 1;
1733                return Ok(());
1734            }
1735            if distance > 256 {
1736                length += 1;
1737            }
1738            if distance >= self.max_dist3 {
1739                length += 1;
1740            }
1741
1742            self.remember_match(distance, length);
1743            self.copy_string(distance, length, out)?;
1744            return Ok(());
1745        }
1746
1747        self.l_count = 0;
1748        self.avr_ln1 += length;
1749        self.avr_ln1 -= self.avr_ln1 >> 4;
1750
1751        let distance_place =
1752            (self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) & 0xff) as usize;
1753        let mut distance = self.ch_set_a[distance_place] as u32;
1754        if distance_place > 0 {
1755            let last_distance = self.ch_set_a[distance_place - 1];
1756            self.ch_set_a[distance_place] = last_distance;
1757            self.ch_set_a[distance_place - 1] = distance as u16;
1758        }
1759        length += 2;
1760        distance += 1;
1761        self.remember_match(distance, length);
1762        self.copy_string(distance, length, out)
1763    }
1764
1765    fn long_lz(&mut self, out: &mut impl Write) -> Result<()> {
1766        self.num_huf = 0;
1767        self.nlzb += 16;
1768        if self.nlzb > 0xff {
1769            self.nlzb = 0x90;
1770            self.nhfb >>= 1;
1771        }
1772        let old_avr2 = self.avr_ln2;
1773
1774        let bit_field = self.bits.get_bits()?;
1775        let mut length = if self.avr_ln2 >= 122 {
1776            self.decode_num(bit_field, 3, DEC_L2, POS_L2)
1777        } else if self.avr_ln2 >= 64 {
1778            self.decode_num(bit_field, 2, DEC_L1, POS_L1)
1779        } else if bit_field < 0x100 {
1780            self.bits.add_bits(16);
1781            bit_field
1782        } else {
1783            let mut length = 0u32;
1784            while ((bit_field << length) & 0x8000) == 0 {
1785                length += 1;
1786            }
1787            self.bits.add_bits((length + 1) as usize);
1788            length
1789        };
1790
1791        self.avr_ln2 += length;
1792        self.avr_ln2 -= self.avr_ln2 >> 5;
1793
1794        let bit_field = self.bits.get_bits()?;
1795        let distance_place = if self.avr_plc_b > 0x28ff {
1796            self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1797        } else if self.avr_plc_b > 0x06ff {
1798            self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1799        } else {
1800            self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1801        };
1802
1803        self.avr_plc_b += distance_place;
1804        self.avr_plc_b -= self.avr_plc_b >> 8;
1805
1806        let idx = (distance_place & 0xff) as usize;
1807        let mut distance;
1808        let mut new_distance_place;
1809        loop {
1810            distance = self.ch_set_b[idx] as u32;
1811            new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
1812            self.n_to_pl_b[(distance & 0xff) as usize] =
1813                self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
1814            distance += 1;
1815            if distance & 0xff == 0 {
1816                corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
1817            } else {
1818                break;
1819            }
1820        }
1821
1822        self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
1823        self.ch_set_b[new_distance_place] = distance as u16;
1824
1825        distance = ((distance & 0xff00) | (self.bits.get_bits()? >> 8)) >> 1;
1826        self.bits.add_bits(7);
1827
1828        let old_avr3 = self.avr_ln3;
1829        if length != 1 && length != 4 {
1830            if length == 0 && distance <= self.max_dist3 {
1831                self.avr_ln3 += 1;
1832                self.avr_ln3 -= self.avr_ln3 >> 8;
1833            } else if self.avr_ln3 > 0 {
1834                self.avr_ln3 -= 1;
1835            }
1836        }
1837        length += 3;
1838        if distance >= self.max_dist3 {
1839            length += 1;
1840        }
1841        if distance <= 256 {
1842            length += 8;
1843        }
1844        if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
1845            self.max_dist3 = 0x7f00;
1846        } else {
1847            self.max_dist3 = 0x2001;
1848        }
1849
1850        self.remember_match(distance, length);
1851        self.copy_string(distance, length, out)
1852    }
1853
1854    fn huff_decode(&mut self, out: &mut impl Write) -> Result<()> {
1855        let bit_field = self.bits.get_bits()?;
1856
1857        let mut byte_place = if self.avr_plc > 0x75ff {
1858            self.decode_num(bit_field, 8, DEC_HF4, POS_HF4)
1859        } else if self.avr_plc > 0x5dff {
1860            self.decode_num(bit_field, 6, DEC_HF3, POS_HF3)
1861        } else if self.avr_plc > 0x35ff {
1862            self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1863        } else if self.avr_plc > 0x0dff {
1864            self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1865        } else {
1866            self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1867        } & 0xff;
1868
1869        if self.st_mode {
1870            if byte_place == 0 && bit_field > 0x0fff {
1871                byte_place = 0x100;
1872            }
1873            if byte_place == 0 {
1874                let bit_field = self.bits.get_bits()?;
1875                self.bits.add_bits(1);
1876                if bit_field & 0x8000 != 0 {
1877                    self.num_huf = 0;
1878                    self.st_mode = false;
1879                    return Ok(());
1880                }
1881
1882                let length = if bit_field & 0x4000 != 0 { 4 } else { 3 };
1883                self.bits.add_bits(1);
1884                let mut distance = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2);
1885                distance = (distance << 5) | (self.bits.get_bits()? >> 11);
1886                self.bits.add_bits(5);
1887                self.copy_string(distance, length, out)?;
1888                return Ok(());
1889            }
1890            byte_place -= 1;
1891        } else {
1892            if self.num_huf >= 16 && self.flags_cnt == 0 {
1893                self.st_mode = true;
1894            }
1895            self.num_huf += 1;
1896        }
1897
1898        self.avr_plc += byte_place;
1899        self.avr_plc -= self.avr_plc >> 8;
1900        self.nhfb += 16;
1901        if self.nhfb > 0xff {
1902            self.nhfb = 0x90;
1903            self.nlzb >>= 1;
1904        }
1905
1906        let byte = (self.ch_set[byte_place as usize] >> 8) as u8;
1907        self.put_byte(byte, out)?;
1908
1909        let idx = byte_place as usize;
1910        let mut cur_byte;
1911        let mut new_byte_place;
1912        loop {
1913            cur_byte = self.ch_set[idx] as u32;
1914            new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
1915            self.n_to_pl[(cur_byte & 0xff) as usize] =
1916                self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
1917            cur_byte += 1;
1918            if cur_byte & 0xff > 0xa1 {
1919                corr_huff(&mut self.ch_set, &mut self.n_to_pl);
1920            } else {
1921                break;
1922            }
1923        }
1924
1925        self.ch_set[idx] = self.ch_set[new_byte_place];
1926        self.ch_set[new_byte_place] = cur_byte as u16;
1927        Ok(())
1928    }
1929
1930    fn get_flags_buf(&mut self) -> Result<()> {
1931        let flags_place = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) as usize;
1932        if flags_place >= self.ch_set_c.len() {
1933            return Ok(());
1934        }
1935
1936        let mut flags;
1937        let mut new_flags_place;
1938        loop {
1939            flags = self.ch_set_c[flags_place] as u32;
1940            new_flags_place = self.n_to_pl_c[(flags & 0xff) as usize] as usize;
1941            self.n_to_pl_c[(flags & 0xff) as usize] =
1942                self.n_to_pl_c[(flags & 0xff) as usize].wrapping_add(1);
1943            self.flag_buf = flags >> 8;
1944            flags += 1;
1945            if flags & 0xff == 0 {
1946                corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
1947            } else {
1948                break;
1949            }
1950        }
1951
1952        self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
1953        self.ch_set_c[new_flags_place] = flags as u16;
1954        Ok(())
1955    }
1956
1957    fn decode_num(
1958        &mut self,
1959        num: u32,
1960        mut start_pos: u32,
1961        dec_tab: &[u16],
1962        pos_tab: &[u16],
1963    ) -> u32 {
1964        let num = num & 0xfff0;
1965        let mut i = 0usize;
1966        while dec_tab[i] as u32 <= num {
1967            start_pos += 1;
1968            i += 1;
1969        }
1970        self.bits.add_bits(start_pos as usize);
1971        ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1972            + pos_tab[start_pos as usize] as u32
1973    }
1974
1975    fn copy_string(&mut self, distance: u32, length: u32, out: &mut impl Write) -> Result<()> {
1976        if self.output_written + length as usize > self.target {
1977            return Err(Error::InvalidData("RAR 1.3 match exceeds output size"));
1978        }
1979
1980        if (!self.first_win_done && distance as usize > self.unp_ptr)
1981            || distance as usize > 0x10000
1982            || distance == 0
1983        {
1984            for _ in 0..length {
1985                self.put_byte(0, out)?;
1986            }
1987        } else {
1988            for _ in 0..length {
1989                let byte = self.window[(self.unp_ptr.wrapping_sub(distance as usize)) & 0xffff];
1990                self.put_byte(byte, out)?;
1991            }
1992        }
1993        Ok(())
1994    }
1995
1996    fn put_byte(&mut self, byte: u8, out: &mut impl Write) -> Result<()> {
1997        if self.output_written >= self.target {
1998            return Err(Error::InvalidData("RAR 1.3 literal exceeds output size"));
1999        }
2000        self.window[self.unp_ptr] = byte;
2001        self.unp_ptr = (self.unp_ptr + 1) & 0xffff;
2002        out.write_all(&[byte])
2003            .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
2004        self.output_written += 1;
2005        Ok(())
2006    }
2007
2008    fn remember_match(&mut self, distance: u32, length: u32) {
2009        self.old_dist[self.old_dist_ptr] = distance;
2010        self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
2011        self.last_length = length;
2012        self.last_dist = distance;
2013    }
2014
2015    fn short_len1(&self, pos: usize) -> u32 {
2016        if pos == 1 {
2017            self.buf60 + 3
2018        } else {
2019            SHORT_LEN1[pos] as u32
2020        }
2021    }
2022
2023    fn short_len2(&self, pos: usize) -> u32 {
2024        if pos == 3 {
2025            self.buf60 + 3
2026        } else {
2027            SHORT_LEN2[pos] as u32
2028        }
2029    }
2030
2031    fn init_huff(&mut self) {
2032        for i in 0..256 {
2033            self.ch_set[i] = (i as u16) << 8;
2034            self.ch_set_b[i] = (i as u16) << 8;
2035            self.ch_set_a[i] = i as u16;
2036            self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
2037        }
2038        self.n_to_pl = [0; 256];
2039        self.n_to_pl_b = [0; 256];
2040        self.n_to_pl_c = [0; 256];
2041        corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
2042    }
2043}
2044
2045impl Default for Unpack15 {
2046    fn default() -> Self {
2047        Self::new()
2048    }
2049}
2050
2051fn corr_huff(char_set: &mut [u16; 256], num_to_place: &mut [u8; 256]) {
2052    let mut pos = 0usize;
2053    for rank in (0..=7).rev() {
2054        for _ in 0..32 {
2055            char_set[pos] = (char_set[pos] & !0xff) | rank;
2056            pos += 1;
2057        }
2058    }
2059    *num_to_place = [0; 256];
2060    for rank in (0..=6).rev() {
2061        num_to_place[rank] = ((7 - rank) * 32) as u8;
2062    }
2063}
2064
2065#[cfg(test)]
2066mod tests {
2067    use super::{
2068        decode_num_bit_cost, decode_num_prefix_is_stable, find_long_lz, find_lz_token,
2069        find_old_dist_lz, long_lz_buckets, should_lazy_emit_literal, unpack15_decode,
2070        unpack15_encode, EncodeOptions, EncodedToken, LongLz, LzPlanState, OldDistLz, ShortLz,
2071        Unpack15, Unpack15Encoder, DEC_HF0, DEC_HF1, DEC_HF2, DEC_HF3, DEC_HF4, DEC_L1, DEC_L2,
2072        POS_HF0, POS_HF1, POS_HF2, POS_HF3, POS_HF4, POS_L1, POS_L2,
2073    };
2074
2075    fn brute_decode_num_bit_cost(
2076        target: u32,
2077        start_pos: u32,
2078        dec_tab: &[u16],
2079        pos_tab: &[u16],
2080    ) -> Option<usize> {
2081        for len in start_pos as usize..=16 {
2082            for code in 0..(1u32 << len) {
2083                if decode_num_prefix_is_stable(code, len, target, start_pos, dec_tab, pos_tab) {
2084                    return Some(len);
2085                }
2086            }
2087        }
2088        None
2089    }
2090
2091    #[test]
2092    fn decode_member_from_reader_accepts_incremental_input() {
2093        struct TinyReader<'a> {
2094            input: &'a [u8],
2095        }
2096
2097        impl std::io::Read for TinyReader<'_> {
2098            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2099                if self.input.is_empty() {
2100                    return Ok(0);
2101                }
2102                let len = self.input.len().min(out.len()).min(2);
2103                out[..len].copy_from_slice(&self.input[..len]);
2104                self.input = &self.input[len..];
2105                Ok(len)
2106            }
2107        }
2108
2109        let expected = b"RAR 1.4 incremental input fixture\n".repeat(32);
2110        let packed = unpack15_encode(&expected).unwrap();
2111        assert_eq!(unpack15_decode(&packed, expected.len()).unwrap(), expected);
2112
2113        let mut reader = TinyReader { input: &packed };
2114        let mut decoder = Unpack15::new();
2115        let mut output = Vec::new();
2116        decoder
2117            .decode_member_from_reader(&mut reader, expected.len(), false, &mut output)
2118            .unwrap();
2119
2120        assert_eq!(output, expected);
2121    }
2122
2123    #[test]
2124    fn decode_num_bit_cost_matches_prefix_search_at_table_boundaries() {
2125        let tables = [
2126            (4, DEC_HF0, POS_HF0),
2127            (5, DEC_HF1, POS_HF1),
2128            (5, DEC_HF2, POS_HF2),
2129            (6, DEC_HF3, POS_HF3),
2130            (8, DEC_HF4, POS_HF4),
2131            (2, DEC_L1, POS_L1),
2132            (3, DEC_L2, POS_L2),
2133        ];
2134        let targets = [0, 1, 2, 3, 7, 8, 16, 24, 32, 33, 53, 117, 233, 255, 256];
2135
2136        for (start_pos, dec_tab, pos_tab) in tables {
2137            for target in targets {
2138                assert_eq!(
2139                    decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2140                    brute_decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2141                    "target {target}, start_pos {start_pos}"
2142                );
2143            }
2144        }
2145    }
2146
2147    #[test]
2148    fn encoder_emits_rar15_very_long_lz_matches() {
2149        let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2150        input.extend_from_within(..258);
2151
2152        assert_eq!(
2153            find_long_lz(&input, 300, 0x8000),
2154            Some(LongLz {
2155                distance: 300,
2156                length: 258
2157            })
2158        );
2159        let packed = unpack15_encode(&input).unwrap();
2160
2161        assert!(
2162            packed.len() < 330,
2163            "very-long LongLZ should encode a 258-byte repeat compactly, got {} bytes",
2164            packed.len()
2165        );
2166        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2167    }
2168
2169    #[test]
2170    fn encoder_adjusts_rar15_long_lz_length_for_far_distance_bonus() {
2171        let mut input: Vec<_> = (0..9000).map(|index| (index * 73 + 19) as u8).collect();
2172        input.extend_from_within(..10);
2173
2174        let packed = unpack15_encode(&input).unwrap();
2175
2176        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2177    }
2178
2179    #[test]
2180    fn encoder_reuses_rar15_repeat_last_token() {
2181        let input = b"abcdefghijklmnop".repeat(64);
2182        let packed = unpack15_encode(&input).unwrap();
2183
2184        assert!(
2185            packed.len() < 100,
2186            "repeat-last tokens should keep a simple repeated pattern compact, got {} bytes",
2187            packed.len()
2188        );
2189        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2190    }
2191
2192    #[test]
2193    fn old_distance_finder_maps_ring_entries_to_short_lz_codes() {
2194        let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2195        let pos = input.len();
2196        input.extend_from_within(pos - 33..pos - 13);
2197
2198        assert_eq!(
2199            find_old_dist_lz(&input, pos, [11, 22, 33, 44], 0, 0x2001),
2200            Some(OldDistLz {
2201                distance: 33,
2202                length: 20,
2203                short_code: 11,
2204            })
2205        );
2206    }
2207
2208    #[test]
2209    fn old_distance_finder_rejects_buf60_toggle_encoding() {
2210        let mut input = b"abcd".repeat(128);
2211        let pos = input.len();
2212        input.extend((0..257).map(|index| b"abcd"[index % 4]));
2213
2214        assert_eq!(
2215            find_old_dist_lz(&input, pos, [u32::MAX, 4, u32::MAX, u32::MAX], 2, 0x2001),
2216            None,
2217            "short-code 10 with decoded base length 0x101 is the Buf60 toggle, not a match"
2218        );
2219    }
2220
2221    #[test]
2222    fn planner_emits_safe_old_distance_token() {
2223        let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2224        let pos = input.len();
2225        input.extend_from_within(pos - 33..pos - 13);
2226
2227        let encoder = Unpack15Encoder::new();
2228        let buckets = long_lz_buckets(&input);
2229        let token = encoder
2230            .choose_lz_token(
2231                &input,
2232                pos,
2233                &buckets,
2234                LzPlanState {
2235                    last_dist: u32::MAX,
2236                    last_length: 0,
2237                    old_dist: [11, 22, 33, 44],
2238                    old_dist_ptr: 0,
2239                    max_dist3: 0x2001,
2240                    nlzb: encoder.nlzb,
2241                    nhfb: encoder.nhfb,
2242                    l_count: encoder.l_count,
2243                },
2244                0,
2245            )
2246            .expect("old-distance candidate should be selected");
2247
2248        assert_eq!(
2249            token,
2250            EncodedToken::OldDist(OldDistLz {
2251                distance: 33,
2252                length: 20,
2253                short_code: 11,
2254            })
2255        );
2256    }
2257
2258    #[test]
2259    fn encoder_exits_stmode_when_literal_runs_trigger_decoder_mode() {
2260        let input: Vec<_> = (0..96).map(|index| (index * 73 + 19) as u8).collect();
2261        let packed = unpack15_encode(&input).unwrap();
2262
2263        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2264    }
2265
2266    #[test]
2267    fn encoder_emits_stmode_literals_for_long_literal_runs() {
2268        let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2269        let mut encoder = Unpack15Encoder::new();
2270        let packed = encoder.encode_member(&input).unwrap();
2271
2272        assert!(
2273            encoder.stmode_literal_count > 0,
2274            "long literal runs should use stmode literals before exiting stmode"
2275        );
2276        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2277    }
2278
2279    #[test]
2280    fn encoder_options_can_disable_stmode_literal_runs() {
2281        let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2282        let mut encoder =
2283            Unpack15Encoder::with_options(EncodeOptions::new().with_stmode_literal_runs(false));
2284        let packed = encoder.encode_member(&input).unwrap();
2285
2286        assert_eq!(encoder.stmode_literal_count, 0);
2287        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2288    }
2289
2290    #[test]
2291    fn encoder_options_bound_long_lz_search_distance() {
2292        let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2293        input.extend_from_within(..64);
2294
2295        assert_eq!(find_long_lz(&input, 300, 256), None);
2296        assert_eq!(
2297            find_long_lz(&input, 300, 0x8000),
2298            Some(LongLz {
2299                distance: 300,
2300                length: 64
2301            })
2302        );
2303    }
2304
2305    #[test]
2306    fn long_lz_search_rejects_unencodable_32k_distance() {
2307        let mut input = Vec::with_capacity(0x8000 + 64);
2308        let mut state = 0x1234_5678u32;
2309        for _ in 0..0x8000 + 64 {
2310            state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
2311            input.push((state >> 24) as u8);
2312        }
2313        let repeated = input[..64].to_vec();
2314        input[0x8000..0x8000 + 64].copy_from_slice(&repeated);
2315
2316        let token = find_long_lz(&input, 0x8000, 0x8000);
2317
2318        assert_ne!(token.map(|token| token.distance), Some(0x8000));
2319    }
2320
2321    #[test]
2322    fn lazy_match_prefers_longer_next_position_match() {
2323        let input = b"abcXbcQRSTabcQRSTUV";
2324        let buckets = long_lz_buckets(input);
2325        let token = find_lz_token(
2326            input,
2327            10,
2328            &buckets,
2329            LzPlanState {
2330                last_dist: u32::MAX,
2331                last_length: 0,
2332                old_dist: [u32::MAX; 4],
2333                old_dist_ptr: 0,
2334                max_dist3: 0x2001,
2335                nlzb: 0,
2336                nhfb: 0,
2337                l_count: 0,
2338            },
2339            EncodeOptions::default(),
2340        )
2341        .unwrap();
2342
2343        assert!(matches!(
2344            token,
2345            EncodedToken::ShortLz(super::ShortLz { length: 3, .. })
2346        ));
2347        assert!(should_lazy_emit_literal(
2348            input,
2349            10,
2350            &buckets,
2351            token,
2352            0x2001,
2353            EncodeOptions::default()
2354        ));
2355
2356        let packed = unpack15_encode(input).unwrap();
2357        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2358    }
2359
2360    #[test]
2361    fn cost_aware_selection_prefers_better_bits_per_byte_token() {
2362        let mut input = vec![b'Z'; 40];
2363        input[7] = b'A';
2364        input[8] = b'A';
2365        input[9] = b'A';
2366        input[10] = b'B';
2367        input[39] = b'A';
2368        let pos = input.len();
2369        input.extend_from_slice(b"AAAAAAAAAA");
2370
2371        let mut encoder = Unpack15Encoder::new();
2372        encoder.old_dist = [u32::MAX, u32::MAX, u32::MAX, 33];
2373        let buckets = long_lz_buckets(&input);
2374        let token = encoder
2375            .choose_lz_token(
2376                &input,
2377                pos,
2378                &buckets,
2379                LzPlanState {
2380                    last_dist: u32::MAX,
2381                    last_length: 0,
2382                    old_dist: encoder.old_dist,
2383                    old_dist_ptr: encoder.old_dist_ptr,
2384                    max_dist3: encoder.max_dist3,
2385                    nlzb: encoder.nlzb,
2386                    nhfb: encoder.nhfb,
2387                    l_count: encoder.l_count,
2388                },
2389                0,
2390            )
2391            .unwrap();
2392
2393        assert_eq!(
2394            token,
2395            EncodedToken::ShortLz(ShortLz {
2396                distance: 1,
2397                length: 10,
2398            })
2399        );
2400    }
2401
2402    #[test]
2403    fn planner_uses_simulated_max_dist3_for_old_distance_candidates() {
2404        let mut state = 0x1234_5678u32;
2405        let mut input = Vec::with_capacity(9004);
2406        for _ in 0..9004 {
2407            state ^= state << 13;
2408            state ^= state >> 17;
2409            state ^= state << 5;
2410            input.push(state as u8);
2411        }
2412        let pos = 9000;
2413        let prefix = [input[0], input[1], input[2]];
2414        input[pos..pos + 3].copy_from_slice(&prefix);
2415        input[pos + 3] = input[3].wrapping_add(1);
2416
2417        let mut encoder = Unpack15Encoder::new();
2418        encoder.max_dist3 = 0x7f00;
2419        let buckets = long_lz_buckets(&input);
2420        let token = encoder.choose_lz_token(
2421            &input,
2422            pos,
2423            &buckets,
2424            LzPlanState {
2425                last_dist: u32::MAX,
2426                last_length: 0,
2427                old_dist: [u32::MAX, u32::MAX, u32::MAX, 9000],
2428                old_dist_ptr: 0,
2429                max_dist3: 0x2001,
2430                nlzb: encoder.nlzb,
2431                nhfb: encoder.nhfb,
2432                l_count: encoder.l_count,
2433            },
2434            0,
2435        );
2436
2437        assert_eq!(token, None);
2438    }
2439
2440    #[test]
2441    fn encoder_round_trips_source_shaped_payload() {
2442        let source = include_bytes!("rar13.rs");
2443        let input = &source[..source.len().min(50_902)];
2444
2445        let packed = unpack15_encode(input).unwrap();
2446        let decoded = unpack15_decode(&packed, input.len()).unwrap();
2447
2448        let first_diff = decoded
2449            .iter()
2450            .zip(input)
2451            .position(|(actual, expected)| actual != expected);
2452        assert_eq!(first_diff, None, "first differing byte in decoded payload");
2453        assert_eq!(decoded, input);
2454    }
2455}
2456
2457#[derive(Clone)]
2458struct BitReader {
2459    input: Vec<u8>,
2460    bit_pos: usize,
2461    final_input: bool,
2462}
2463
2464impl BitReader {
2465    fn new(input: &[u8]) -> Self {
2466        Self {
2467            input: input.to_vec(),
2468            bit_pos: 0,
2469            final_input: false,
2470        }
2471    }
2472
2473    fn new_final(input: &[u8]) -> Self {
2474        Self {
2475            input: input.to_vec(),
2476            bit_pos: 0,
2477            final_input: true,
2478        }
2479    }
2480
2481    fn append(&mut self, input: &[u8]) {
2482        self.compact();
2483        self.input.extend_from_slice(input);
2484    }
2485
2486    fn finish(&mut self) {
2487        self.final_input = true;
2488    }
2489
2490    fn compact(&mut self) {
2491        let bytes = self.bit_pos / 8;
2492        if bytes == 0 {
2493            return;
2494        }
2495        self.input.drain(..bytes);
2496        self.bit_pos -= bytes * 8;
2497    }
2498
2499    fn get_bits(&self) -> Result<u32> {
2500        let mut value = 0u32;
2501        for i in 0..16 {
2502            value <<= 1;
2503            let bit_index = self.bit_pos + i;
2504            let byte = match self.input.get(bit_index / 8).copied() {
2505                Some(byte) => byte,
2506                None if self.final_input => 0,
2507                None => return Err(Error::NeedMoreInput),
2508            };
2509            value |= ((byte >> (7 - (bit_index % 8))) & 1) as u32;
2510        }
2511        Ok(value)
2512    }
2513
2514    fn add_bits(&mut self, count: usize) {
2515        self.bit_pos += count;
2516    }
2517}
2518
2519#[derive(Default)]
2520struct BitWriter {
2521    output: Vec<u8>,
2522    bit_pos: usize,
2523}
2524
2525impl BitWriter {
2526    fn new() -> Self {
2527        Self {
2528            output: Vec::new(),
2529            bit_pos: 0,
2530        }
2531    }
2532
2533    fn write_bits(&mut self, value: u32, count: usize) {
2534        for i in (0..count).rev() {
2535            let bit = ((value >> i) & 1) as u8;
2536            if self.bit_pos.is_multiple_of(8) {
2537                self.output.push(0);
2538            }
2539            if bit != 0 {
2540                let idx = self.output.len() - 1;
2541                self.output[idx] |= 1 << (7 - (self.bit_pos % 8));
2542            }
2543            self.bit_pos += 1;
2544        }
2545    }
2546
2547    fn finish(self) -> Vec<u8> {
2548        self.output
2549    }
2550}