Skip to main content

rars_codec/
rar13.rs

1use crate::{Error, Result};
2use std::io::{Read, Write};
3
4const MATCH_HASH_BUCKETS: usize = 4096;
5const MAX_LONG_MATCH_CANDIDATES: usize = 64;
6const MAX_LONG_LZ_DISTANCE: usize = 0x7fff;
7
8const DEC_L1: &[u16] = &[
9    0x8000, 0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf200, 0xffff,
10];
11const POS_L1: &[u16] = &[0, 0, 0, 2, 3, 5, 7, 11, 16, 20, 24, 32, 32];
12const DEC_L2: &[u16] = &[
13    0xa000, 0xc000, 0xd000, 0xe000, 0xea00, 0xee00, 0xf000, 0xf200, 0xf240, 0xffff,
14];
15const POS_L2: &[u16] = &[0, 0, 0, 0, 5, 7, 9, 13, 18, 22, 26, 34, 36];
16const DEC_HF0: &[u16] = &[
17    0x8000, 0xc000, 0xe000, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xffff,
18];
19const POS_HF0: &[u16] = &[0, 0, 0, 0, 0, 8, 16, 24, 33, 33, 33, 33, 33];
20const DEC_HF1: &[u16] = &[
21    0x2000, 0xc000, 0xe000, 0xf000, 0xf200, 0xf200, 0xf7e0, 0xffff,
22];
23const POS_HF1: &[u16] = &[0, 0, 0, 0, 0, 0, 4, 44, 60, 76, 80, 80, 127];
24const DEC_HF2: &[u16] = &[
25    0x1000, 0x2400, 0x8000, 0xc000, 0xfa00, 0xffff, 0xffff, 0xffff,
26];
27const POS_HF2: &[u16] = &[0, 0, 0, 0, 0, 0, 2, 7, 53, 117, 233, 0, 0];
28const DEC_HF3: &[u16] = &[0x0800, 0x2400, 0xee00, 0xfe80, 0xffff, 0xffff, 0xffff];
29const POS_HF3: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 2, 16, 218, 251, 0, 0];
30const DEC_HF4: &[u16] = &[0xff00, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff];
31const POS_HF4: &[u16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0];
32
33const SHORT_LEN1: [u8; 16] = [1, 3, 4, 4, 5, 6, 7, 8, 8, 4, 4, 5, 6, 6, 4, 0];
34const SHORT_XOR1: [u8; 15] = [
35    0x00, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
36];
37const SHORT_LEN2: [u8; 16] = [2, 3, 3, 3, 4, 4, 5, 6, 6, 4, 4, 5, 6, 6, 4, 0];
38const SHORT_XOR2: [u8; 15] = [
39    0x00, 0x40, 0x60, 0xa0, 0xd0, 0xe0, 0xf0, 0xf8, 0xfc, 0xc0, 0x80, 0x90, 0x98, 0x9c, 0xb0,
40];
41
42pub fn unpack15_encode(input: &[u8]) -> Result<Vec<u8>> {
43    unpack15_encode_with_options(input, EncodeOptions::default())
44}
45
46pub fn unpack15_encode_with_options(input: &[u8], options: EncodeOptions) -> Result<Vec<u8>> {
47    if input.is_empty() {
48        return Ok(Vec::new());
49    }
50
51    let mut encoder = Unpack15Encoder::with_options(options);
52    encoder.encode_member(input)
53}
54
55pub fn unpack15_decode(input: &[u8], output_size: usize) -> Result<Vec<u8>> {
56    let mut decoder = Unpack15::new();
57    decoder.decode_member(input, output_size, false)
58}
59
60pub struct Unpack15Encoder {
61    bits: BitWriter,
62    options: EncodeOptions,
63    // State names follow RAR13_FORMAT_SPECIFICATION.md ยง6 so the codec state
64    // lines up directly with the documented Unpack15 tables and traces.
65    ch_set: [u16; 256],
66    ch_set_c: [u16; 256],
67    ch_set_b: [u16; 256],
68    n_to_pl: [u8; 256],
69    n_to_pl_b: [u8; 256],
70    n_to_pl_c: [u8; 256],
71    ch_set_a: [u16; 256],
72    avr_plc: u32,
73    avr_plc_b: u32,
74    avr_ln1: u32,
75    avr_ln2: u32,
76    avr_ln3: u32,
77    max_dist3: u32,
78    nhfb: u32,
79    nlzb: u32,
80    num_huf: u32,
81    old_dist: [u32; 4],
82    old_dist_ptr: usize,
83    last_dist: u32,
84    last_length: u32,
85    l_count: u32,
86    #[cfg(test)]
87    stmode_literal_count: usize,
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub struct EncodeOptions {
92    old_distance_tokens: bool,
93    lazy_matching: bool,
94    stmode_literal_runs: bool,
95    max_long_match_distance: usize,
96}
97
98impl EncodeOptions {
99    pub const fn new() -> Self {
100        Self {
101            old_distance_tokens: true,
102            lazy_matching: true,
103            stmode_literal_runs: true,
104            max_long_match_distance: MAX_LONG_LZ_DISTANCE,
105        }
106    }
107
108    pub const fn with_old_distance_tokens(mut self, enabled: bool) -> Self {
109        self.old_distance_tokens = enabled;
110        self
111    }
112
113    pub const fn with_lazy_matching(mut self, enabled: bool) -> Self {
114        self.lazy_matching = enabled;
115        self
116    }
117
118    pub const fn with_stmode_literal_runs(mut self, enabled: bool) -> Self {
119        self.stmode_literal_runs = enabled;
120        self
121    }
122
123    pub const fn with_max_long_match_distance(mut self, distance: usize) -> Self {
124        self.max_long_match_distance = distance;
125        self
126    }
127
128    pub const fn old_distance_tokens_enabled(self) -> bool {
129        self.old_distance_tokens
130    }
131}
132
133impl Default for EncodeOptions {
134    fn default() -> Self {
135        Self::new()
136    }
137}
138
139impl Unpack15Encoder {
140    pub fn new() -> Self {
141        Self::with_options(EncodeOptions::default())
142    }
143
144    pub fn with_options(options: EncodeOptions) -> Self {
145        let mut encoder = Self {
146            bits: BitWriter::new(),
147            options,
148            ch_set: [0; 256],
149            ch_set_c: [0; 256],
150            ch_set_b: [0; 256],
151            n_to_pl: [0; 256],
152            n_to_pl_b: [0; 256],
153            n_to_pl_c: [0; 256],
154            ch_set_a: [0; 256],
155            avr_plc: 0x3500,
156            avr_plc_b: 0,
157            avr_ln1: 0,
158            avr_ln2: 0,
159            avr_ln3: 0,
160            max_dist3: 0x2001,
161            nhfb: 0x80,
162            nlzb: 0x80,
163            num_huf: 0,
164            old_dist: [u32::MAX; 4],
165            old_dist_ptr: 0,
166            last_dist: u32::MAX,
167            last_length: 0,
168            l_count: 0,
169            #[cfg(test)]
170            stmode_literal_count: 0,
171        };
172        encoder.init_huff();
173        encoder
174    }
175
176    pub fn encode_literals_only(mut self, input: &[u8]) -> Result<Vec<u8>> {
177        self.encode_literals_only_member(input)
178    }
179
180    fn encode_literals_only_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
181        if input.is_empty() {
182            return Ok(Vec::new());
183        }
184        self.bits = BitWriter::new();
185        let mut pos = 0usize;
186        while pos < input.len() {
187            let mut flags = 0u8;
188            let mut flag_bits = 0usize;
189            let mut payloads = Vec::new();
190            let mut plan_nhfb = self.nhfb;
191            let mut plan_nlzb = self.nlzb;
192            let mut plan_num_huf = self.num_huf;
193            let mut group_enters_stmode = false;
194
195            while flag_bits < 8 && pos < input.len() {
196                let flag = huff_flag_bits(plan_nlzb <= plan_nhfb);
197                if flag_bits + flag.len() > 8 {
198                    break;
199                }
200                write_planned_flag_bits(&mut flags, flag_bits, flag);
201                payloads.push(EncodedToken::Literal(input[pos]));
202                flag_bits += flag.len();
203                if flag_bits == 8 && plan_num_huf >= 16 && pos + 1 < input.len() {
204                    group_enters_stmode = true;
205                }
206                plan_num_huf += 1;
207                pos += 1;
208                plan_huff_effect(&mut plan_nhfb, &mut plan_nlzb);
209            }
210
211            self.emit_flags_byte(flags)?;
212            self.emit_payloads(payloads)?;
213            if group_enters_stmode {
214                if self.options.stmode_literal_runs {
215                    self.emit_stmode_literal_run(input, &[], &mut pos, false)?;
216                }
217                self.emit_stmode_exit()?;
218            }
219        }
220        Ok(std::mem::take(&mut self.bits).finish())
221    }
222
223    pub fn encode_member(&mut self, input: &[u8]) -> Result<Vec<u8>> {
224        if input.is_empty() {
225            return Ok(Vec::new());
226        }
227        self.bits = BitWriter::new();
228        let buckets = long_lz_buckets(input);
229        let mut pos = 0usize;
230        while pos < input.len() {
231            let mut flags = 0u8;
232            let mut flag_bits = 0usize;
233            let mut payloads = Vec::new();
234            let mut plan_encoder = self.clone_for_planning();
235            let mut group_enters_stmode = false;
236
237            while flag_bits < 8 && pos < input.len() {
238                let state = plan_encoder.lz_plan_state();
239                if let Some(token) = plan_encoder
240                    .choose_lz_token(input, pos, &buckets, state, flag_bits)
241                    .filter(|token| {
242                        !self.options.lazy_matching
243                            || !should_lazy_emit_literal(
244                                input,
245                                pos,
246                                &buckets,
247                                *token,
248                                state.max_dist3,
249                                self.options,
250                            )
251                    })
252                {
253                    let flag = token.flag_bits(state.nlzb, state.nhfb);
254                    let next_pos = pos + token.length() as usize;
255                    let leaves_unfillable_flag_bit =
256                        flag_bits + flag.len() == 7 && next_pos < input.len();
257                    if flag_bits + flag.len() <= 8 && !leaves_unfillable_flag_bit {
258                        write_planned_flag_bits(&mut flags, flag_bits, flag);
259                        flag_bits += flag.len();
260                        pos = next_pos;
261                        plan_encoder.emit_payloads(vec![token])?;
262                        payloads.push(token);
263                        continue;
264                    }
265                }
266
267                let flag = huff_flag_bits(plan_encoder.nlzb <= plan_encoder.nhfb);
268                if flag_bits + flag.len() > 8 {
269                    break;
270                }
271                write_planned_flag_bits(&mut flags, flag_bits, flag);
272                let literal = input[pos];
273                payloads.push(EncodedToken::Literal(input[pos]));
274                flag_bits += flag.len();
275                if flag_bits == 8 && plan_encoder.num_huf >= 16 && pos + 1 < input.len() {
276                    group_enters_stmode = true;
277                }
278                pos += 1;
279                plan_encoder.emit_literal(literal)?;
280            }
281
282            self.emit_flags_byte(flags)?;
283            self.emit_payloads(payloads)?;
284            if group_enters_stmode {
285                if self.options.stmode_literal_runs {
286                    self.emit_stmode_literal_run(input, &buckets, &mut pos, true)?;
287                }
288                self.emit_stmode_exit()?;
289            }
290        }
291        Ok(std::mem::take(&mut self.bits).finish())
292    }
293
294    fn clone_for_planning(&self) -> Self {
295        Self {
296            bits: BitWriter::new(),
297            options: self.options,
298            ch_set: self.ch_set,
299            ch_set_c: self.ch_set_c,
300            ch_set_b: self.ch_set_b,
301            n_to_pl: self.n_to_pl,
302            n_to_pl_b: self.n_to_pl_b,
303            n_to_pl_c: self.n_to_pl_c,
304            ch_set_a: self.ch_set_a,
305            avr_plc: self.avr_plc,
306            avr_plc_b: self.avr_plc_b,
307            avr_ln1: self.avr_ln1,
308            avr_ln2: self.avr_ln2,
309            avr_ln3: self.avr_ln3,
310            max_dist3: self.max_dist3,
311            nhfb: self.nhfb,
312            nlzb: self.nlzb,
313            num_huf: self.num_huf,
314            old_dist: self.old_dist,
315            old_dist_ptr: self.old_dist_ptr,
316            last_dist: self.last_dist,
317            last_length: self.last_length,
318            l_count: self.l_count,
319            #[cfg(test)]
320            stmode_literal_count: self.stmode_literal_count,
321        }
322    }
323
324    fn lz_plan_state(&self) -> LzPlanState {
325        LzPlanState {
326            last_dist: self.last_dist,
327            last_length: self.last_length,
328            old_dist: self.old_dist,
329            old_dist_ptr: self.old_dist_ptr,
330            max_dist3: self.max_dist3,
331            nlzb: self.nlzb,
332            nhfb: self.nhfb,
333            l_count: self.l_count,
334        }
335    }
336
337    fn choose_lz_token(
338        &self,
339        input: &[u8],
340        pos: usize,
341        buckets: &[Vec<usize>],
342        state: LzPlanState,
343        flag_bits: usize,
344    ) -> Option<EncodedToken> {
345        let candidates = find_lz_tokens(input, pos, buckets, state, self.options);
346        candidates
347            .into_iter()
348            .filter(|token| {
349                let flag_len = token.flag_bits(state.nlzb, state.nhfb).len();
350                let next_pos = pos + token.length() as usize;
351                next_pos == input.len()
352                    || (flag_bits + flag_len != 7
353                        && !(flag_len == 1 && flag_bits.is_multiple_of(2)))
354            })
355            .filter_map(|token| self.token_bit_cost(token, state).map(|cost| (token, cost)))
356            .min_by(|(left, left_cost), (right, right_cost)| {
357                let left_score = left_cost * 256 / left.length() as usize;
358                let right_score = right_cost * 256 / right.length() as usize;
359                left_score
360                    .cmp(&right_score)
361                    .then_with(|| right.length().cmp(&left.length()))
362            })
363            .map(|(token, _)| token)
364    }
365
366    fn token_bit_cost(&self, token: EncodedToken, state: LzPlanState) -> Option<usize> {
367        let flag_cost = token.flag_bits(state.nlzb, state.nhfb).len();
368        match token {
369            EncodedToken::Literal(byte) => {
370                let place = self
371                    .ch_set
372                    .iter()
373                    .position(|&value| (value >> 8) as u8 == byte)?;
374                Some(flag_cost + self.literal_place_bit_cost(place)?)
375            }
376            EncodedToken::RepeatLast(_) => {
377                Some(flag_cost + self.repeat_last_bit_cost(state.l_count))
378            }
379            EncodedToken::ShortLz(token) => {
380                let distance_value = token.distance.checked_sub(1)?;
381                let distance_place = self
382                    .ch_set_a
383                    .iter()
384                    .position(|&value| value as u32 == distance_value)?;
385                Some(
386                    flag_cost
387                        + l_count_break_bit_cost(state.l_count)
388                        + self.short_lz_prefix_bit_cost(token.length - 2)?
389                        + decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)?,
390                )
391            }
392            EncodedToken::OldDist(token) => {
393                let length_code = old_dist_lz_length_code(
394                    token.length,
395                    token.distance,
396                    state.max_dist3,
397                    token.short_code,
398                )?;
399                Some(
400                    flag_cost
401                        + l_count_break_bit_cost(state.l_count)
402                        + self.short_lz_prefix_bit_cost(token.short_code)?
403                        + decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)?,
404                )
405            }
406            EncodedToken::LongLz(token) => {
407                let length_code = long_lz_length_code_for_distance(token, state.max_dist3)?;
408                let distance_place = self.long_lz_distance_place(token.distance).ok()?;
409                Some(
410                    flag_cost
411                        + self.long_lz_length_bit_cost(length_code)?
412                        + self.long_lz_distance_bit_cost(distance_place)?
413                        + 7,
414                )
415            }
416        }
417    }
418
419    fn emit_payloads(&mut self, payloads: Vec<EncodedToken>) -> Result<()> {
420        for payload in payloads {
421            match payload {
422                EncodedToken::Literal(byte) => self.emit_literal(byte)?,
423                EncodedToken::ShortLz(short_lz) => {
424                    self.emit_short_lz(short_lz)?;
425                }
426                EncodedToken::RepeatLast(repeat) => {
427                    self.emit_repeat_last(repeat)?;
428                }
429                EncodedToken::OldDist(old_lz) => {
430                    self.emit_old_dist_lz(old_lz)?;
431                }
432                EncodedToken::LongLz(long_lz) => {
433                    self.emit_long_lz(long_lz)?;
434                }
435            }
436        }
437
438        Ok(())
439    }
440
441    fn emit_flags_byte(&mut self, flags: u8) -> Result<()> {
442        let flags_place = self
443            .ch_set_c
444            .iter()
445            .position(|&value| (value >> 8) as u8 == flags)
446            .ok_or(Error::InvalidData("RAR 1.3 flag byte is not encodable"))?;
447        emit_decode_num(&mut self.bits, flags_place as u32, 5, DEC_HF2, POS_HF2)?;
448
449        let mut cur_flags;
450        let mut new_flags_place;
451        loop {
452            cur_flags = self.ch_set_c[flags_place] as u32;
453            new_flags_place = self.n_to_pl_c[(cur_flags & 0xff) as usize] as usize;
454            self.n_to_pl_c[(cur_flags & 0xff) as usize] =
455                self.n_to_pl_c[(cur_flags & 0xff) as usize].wrapping_add(1);
456            cur_flags += 1;
457            if cur_flags & 0xff == 0 {
458                corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
459            } else {
460                break;
461            }
462        }
463
464        self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
465        self.ch_set_c[new_flags_place] = cur_flags as u16;
466        Ok(())
467    }
468
469    fn emit_literal(&mut self, byte: u8) -> Result<()> {
470        let byte_place = self
471            .ch_set
472            .iter()
473            .position(|&value| (value >> 8) as u8 == byte)
474            .ok_or(Error::InvalidData("RAR 1.3 literal is not encodable"))?;
475        self.emit_literal_place(byte_place, byte_place, true)
476    }
477
478    fn emit_stmode_literal(&mut self, byte: u8) -> Result<()> {
479        let byte_place = self
480            .ch_set
481            .iter()
482            .position(|&value| (value >> 8) as u8 == byte)
483            .ok_or(Error::InvalidData(
484                "RAR 1.3 stmode literal is not encodable",
485            ))?;
486        #[cfg(test)]
487        {
488            self.stmode_literal_count += 1;
489        }
490        self.emit_literal_place(byte_place + 1, byte_place, false)
491    }
492
493    fn emit_stmode_literal_run(
494        &mut self,
495        input: &[u8],
496        buckets: &[Vec<usize>],
497        pos: &mut usize,
498        allow_lz: bool,
499    ) -> Result<()> {
500        while *pos + 1 < input.len() {
501            if allow_lz
502                && find_lz_token(
503                    input,
504                    *pos,
505                    buckets,
506                    LzPlanState {
507                        last_dist: self.last_dist,
508                        last_length: self.last_length,
509                        old_dist: self.old_dist,
510                        old_dist_ptr: self.old_dist_ptr,
511                        max_dist3: self.max_dist3,
512                        nlzb: self.nlzb,
513                        nhfb: self.nhfb,
514                        l_count: self.l_count,
515                    },
516                    self.options,
517                )
518                .is_some()
519            {
520                break;
521            }
522            self.emit_stmode_literal(input[*pos])?;
523            *pos += 1;
524        }
525        Ok(())
526    }
527
528    fn emit_literal_place(
529        &mut self,
530        encoded_place: usize,
531        decoded_place: usize,
532        update_num_huf: bool,
533    ) -> Result<()> {
534        if encoded_place > self.ch_set.len() || decoded_place >= self.ch_set.len() {
535            return Err(Error::InvalidData("RAR 1.3 literal is not encodable"));
536        }
537
538        let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
539            (8, DEC_HF4, POS_HF4)
540        } else if self.avr_plc > 0x5dff {
541            (6, DEC_HF3, POS_HF3)
542        } else if self.avr_plc > 0x35ff {
543            (5, DEC_HF2, POS_HF2)
544        } else if self.avr_plc > 0x0dff {
545            (5, DEC_HF1, POS_HF1)
546        } else {
547            (4, DEC_HF0, POS_HF0)
548        };
549        emit_decode_num(
550            &mut self.bits,
551            encoded_place as u32,
552            start_pos,
553            dec_tab,
554            pos_tab,
555        )?;
556
557        self.avr_plc += decoded_place as u32;
558        self.avr_plc -= self.avr_plc >> 8;
559        self.nhfb += 16;
560        if self.nhfb > 0xff {
561            self.nhfb = 0x90;
562            self.nlzb >>= 1;
563        }
564        if update_num_huf {
565            self.num_huf += 1;
566        }
567
568        let idx = decoded_place;
569        let mut cur_byte;
570        let mut new_byte_place;
571        loop {
572            cur_byte = self.ch_set[idx] as u32;
573            new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
574            self.n_to_pl[(cur_byte & 0xff) as usize] =
575                self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
576            cur_byte += 1;
577            if cur_byte & 0xff > 0xa1 {
578                corr_huff(&mut self.ch_set, &mut self.n_to_pl);
579            } else {
580                break;
581            }
582        }
583
584        self.ch_set[idx] = self.ch_set[new_byte_place];
585        self.ch_set[new_byte_place] = cur_byte as u16;
586        Ok(())
587    }
588
589    fn emit_short_lz(&mut self, short_lz: ShortLz) -> Result<()> {
590        self.num_huf = 0;
591        if self.l_count == 2 {
592            self.bits.write_bits(0, 1);
593            self.l_count = 0;
594        }
595        let length_place = short_lz.length - 2;
596        self.emit_short_lz_code(length_place as usize)?;
597        self.l_count = 0;
598
599        self.avr_ln1 += length_place;
600        self.avr_ln1 -= self.avr_ln1 >> 4;
601
602        let distance_value = short_lz.distance - 1;
603        let distance_place = self
604            .ch_set_a
605            .iter()
606            .position(|&value| value as u32 == distance_value)
607            .ok_or(Error::InvalidData(
608                "RAR 1.3 ShortLZ distance is not encodable",
609            ))?;
610        emit_decode_num(&mut self.bits, distance_place as u32, 5, DEC_HF2, POS_HF2)?;
611        if distance_place > 0 {
612            let last_distance = self.ch_set_a[distance_place - 1];
613            self.ch_set_a[distance_place] = last_distance;
614            self.ch_set_a[distance_place - 1] = distance_value as u16;
615        }
616        self.remember_match(short_lz.distance, short_lz.length);
617        Ok(())
618    }
619
620    fn emit_repeat_last(&mut self, repeat: RepeatLastLz) -> Result<()> {
621        if self.last_dist != repeat.distance || self.last_length != repeat.length {
622            return Err(Error::InvalidData(
623                "RAR 1.3 repeat-last state is not encodable",
624            ));
625        }
626        self.num_huf = 0;
627        if self.l_count == 2 {
628            self.bits.write_bits(1, 1);
629        } else {
630            self.emit_short_lz_code(9)?;
631            self.l_count += 1;
632        }
633        Ok(())
634    }
635
636    fn emit_old_dist_lz(&mut self, old_lz: OldDistLz) -> Result<()> {
637        self.num_huf = 0;
638        if self.l_count == 2 {
639            self.bits.write_bits(0, 1);
640            self.l_count = 0;
641        }
642        self.emit_short_lz_code(old_lz.short_code as usize)?;
643        self.l_count = 0;
644
645        let expected_distance = self.old_dist[(self
646            .old_dist_ptr
647            .wrapping_sub((old_lz.short_code - 9) as usize))
648            & 3];
649        if expected_distance != old_lz.distance {
650            return Err(Error::InvalidData(
651                "RAR 1.3 old-distance state is not encodable",
652            ));
653        }
654        let length_code = old_dist_lz_length_code(
655            old_lz.length,
656            old_lz.distance,
657            self.max_dist3,
658            old_lz.short_code,
659        )
660        .ok_or(Error::InvalidData(
661            "RAR 1.3 old-distance length is not encodable",
662        ))?;
663        emit_decode_num(&mut self.bits, length_code, 2, DEC_L1, POS_L1)?;
664        self.remember_match(old_lz.distance, old_lz.length);
665        Ok(())
666    }
667
668    fn emit_short_lz_code(&mut self, code: usize) -> Result<()> {
669        let (code_len, code_byte) = if self.avr_ln1 < 37 {
670            (self.short_len1(code), SHORT_XOR1[code])
671        } else {
672            (self.short_len2(code), SHORT_XOR2[code])
673        };
674        self.bits
675            .write_bits((code_byte >> (8 - code_len)) as u32, code_len as usize);
676        Ok(())
677    }
678
679    fn short_lz_prefix_bit_cost(&self, code: u32) -> Option<usize> {
680        let code = usize::try_from(code).ok()?;
681        if code >= SHORT_XOR1.len() {
682            return None;
683        }
684        Some(if self.avr_ln1 < 37 {
685            self.short_len1(code)
686        } else {
687            self.short_len2(code)
688        } as usize)
689    }
690
691    fn repeat_last_bit_cost(&self, l_count: u32) -> usize {
692        if l_count == 2 {
693            1
694        } else {
695            self.short_lz_prefix_bit_cost(9)
696                .expect("repeat-last code is encodable")
697        }
698    }
699
700    fn emit_long_lz(&mut self, long_lz: LongLz) -> Result<()> {
701        self.num_huf = 0;
702        self.nlzb += 16;
703        if self.nlzb > 0xff {
704            self.nlzb = 0x90;
705            self.nhfb >>= 1;
706        }
707        let old_avr2 = self.avr_ln2;
708
709        let length_code = self.long_lz_length_code(long_lz).ok_or(Error::InvalidData(
710            "RAR 1.3 LongLZ match length is not encodable for distance",
711        ))?;
712        emit_long_lz_length(&mut self.bits, self.avr_ln2, length_code)?;
713        self.avr_ln2 += length_code;
714        self.avr_ln2 -= self.avr_ln2 >> 5;
715
716        let distance_place = self.long_lz_distance_place(long_lz.distance)?;
717        let (start_pos, dec_tab, pos_tab) = if self.avr_plc_b > 0x28ff {
718            (5, DEC_HF2, POS_HF2)
719        } else if self.avr_plc_b > 0x06ff {
720            (5, DEC_HF1, POS_HF1)
721        } else {
722            (4, DEC_HF0, POS_HF0)
723        };
724        emit_decode_num(
725            &mut self.bits,
726            distance_place as u32,
727            start_pos,
728            dec_tab,
729            pos_tab,
730        )?;
731        self.avr_plc_b += distance_place as u32;
732        self.avr_plc_b -= self.avr_plc_b >> 8;
733
734        let idx = distance_place;
735        let mut distance;
736        let mut new_distance_place;
737        loop {
738            distance = self.ch_set_b[idx] as u32;
739            new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
740            self.n_to_pl_b[(distance & 0xff) as usize] =
741                self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
742            distance += 1;
743            if distance & 0xff == 0 {
744                corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
745            } else {
746                break;
747            }
748        }
749
750        self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
751        self.ch_set_b[new_distance_place] = distance as u16;
752
753        let low_byte = ((long_lz.distance << 1) & 0xff) as u8;
754        self.bits.write_bits((low_byte >> 1) as u32, 7);
755
756        let old_avr3 = self.avr_ln3;
757        if length_code != 1 && length_code != 4 {
758            if length_code == 0 && long_lz.distance <= self.max_dist3 {
759                self.avr_ln3 += 1;
760                self.avr_ln3 -= self.avr_ln3 >> 8;
761            } else if self.avr_ln3 > 0 {
762                self.avr_ln3 -= 1;
763            }
764        }
765        if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
766            self.max_dist3 = 0x7f00;
767        } else {
768            self.max_dist3 = 0x2001;
769        }
770
771        self.remember_match(long_lz.distance, long_lz.length);
772        Ok(())
773    }
774
775    fn long_lz_length_code(&self, long_lz: LongLz) -> Option<u32> {
776        long_lz_length_code_for_distance(long_lz, self.max_dist3)
777    }
778
779    fn long_lz_distance_place(&self, target_distance: u32) -> Result<usize> {
780        let wanted_high = ((target_distance << 1) & 0xff00) as u16;
781        self.ch_set_b
782            .iter()
783            .position(|&value| value & 0xff00 == wanted_high)
784            .ok_or(Error::InvalidData(
785                "RAR 1.3 LongLZ distance is not encodable",
786            ))
787    }
788
789    fn literal_place_bit_cost(&self, place: usize) -> Option<usize> {
790        if self.avr_plc > 0x75ff {
791            decode_num_bit_cost(place as u32, 8, DEC_HF4, POS_HF4)
792        } else if self.avr_plc > 0x5dff {
793            decode_num_bit_cost(place as u32, 6, DEC_HF3, POS_HF3)
794        } else if self.avr_plc > 0x35ff {
795            decode_num_bit_cost(place as u32, 5, DEC_HF2, POS_HF2)
796        } else if self.avr_plc > 0x0dff {
797            decode_num_bit_cost(place as u32, 5, DEC_HF1, POS_HF1)
798        } else {
799            decode_num_bit_cost(place as u32, 4, DEC_HF0, POS_HF0)
800        }
801    }
802
803    fn long_lz_length_bit_cost(&self, length_code: u32) -> Option<usize> {
804        if self.avr_ln2 >= 122 {
805            decode_num_bit_cost(length_code, 3, DEC_L2, POS_L2)
806        } else if self.avr_ln2 >= 64 {
807            decode_num_bit_cost(length_code, 2, DEC_L1, POS_L1)
808        } else if length_code <= 7 {
809            Some(length_code as usize + 1)
810        } else if length_code < 0x100 {
811            Some(16)
812        } else {
813            None
814        }
815    }
816
817    fn long_lz_distance_bit_cost(&self, distance_place: usize) -> Option<usize> {
818        if self.avr_plc_b > 0x28ff {
819            decode_num_bit_cost(distance_place as u32, 5, DEC_HF2, POS_HF2)
820        } else if self.avr_plc_b > 0x06ff {
821            decode_num_bit_cost(distance_place as u32, 5, DEC_HF1, POS_HF1)
822        } else {
823            decode_num_bit_cost(distance_place as u32, 4, DEC_HF0, POS_HF0)
824        }
825    }
826
827    fn emit_stmode_exit(&mut self) -> Result<()> {
828        let (start_pos, dec_tab, pos_tab) = if self.avr_plc > 0x75ff {
829            (8, DEC_HF4, POS_HF4)
830        } else if self.avr_plc > 0x5dff {
831            (6, DEC_HF3, POS_HF3)
832        } else if self.avr_plc > 0x35ff {
833            (5, DEC_HF2, POS_HF2)
834        } else if self.avr_plc > 0x0dff {
835            (5, DEC_HF1, POS_HF1)
836        } else {
837            (4, DEC_HF0, POS_HF0)
838        };
839        emit_decode_num(&mut self.bits, 0, start_pos, dec_tab, pos_tab)?;
840        self.bits.write_bits(1, 1);
841        self.num_huf = 0;
842        Ok(())
843    }
844
845    fn init_huff(&mut self) {
846        for i in 0..256 {
847            self.ch_set[i] = (i as u16) << 8;
848            self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
849            self.ch_set_b[i] = (i as u16) << 8;
850        }
851        self.n_to_pl = [0; 256];
852        self.n_to_pl_b = [0; 256];
853        self.n_to_pl_c = [0; 256];
854        for i in 0..256 {
855            self.ch_set_a[i] = i as u16;
856        }
857        corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
858    }
859
860    fn remember_match(&mut self, distance: u32, length: u32) {
861        self.old_dist[self.old_dist_ptr] = distance;
862        self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
863        self.last_length = length;
864        self.last_dist = distance;
865    }
866
867    fn short_len1(&self, pos: usize) -> u8 {
868        if pos == 1 {
869            3
870        } else {
871            SHORT_LEN1[pos]
872        }
873    }
874
875    fn short_len2(&self, pos: usize) -> u8 {
876        if pos == 3 {
877            3
878        } else {
879            SHORT_LEN2[pos]
880        }
881    }
882}
883
884impl Default for Unpack15Encoder {
885    fn default() -> Self {
886        Self::new()
887    }
888}
889
890#[derive(Debug, Clone, Copy)]
891struct LzPlanState {
892    last_dist: u32,
893    last_length: u32,
894    old_dist: [u32; 4],
895    old_dist_ptr: usize,
896    max_dist3: u32,
897    nlzb: u32,
898    nhfb: u32,
899    l_count: u32,
900}
901
902#[derive(Debug, Clone, Copy, PartialEq, Eq)]
903enum EncodedToken {
904    Literal(u8),
905    ShortLz(ShortLz),
906    RepeatLast(RepeatLastLz),
907    OldDist(OldDistLz),
908    LongLz(LongLz),
909}
910
911impl EncodedToken {
912    fn length(self) -> u32 {
913        match self {
914            Self::Literal(_) => 1,
915            Self::ShortLz(token) => token.length,
916            Self::RepeatLast(token) => token.length,
917            Self::OldDist(token) => token.length,
918            Self::LongLz(token) => token.length,
919        }
920    }
921
922    fn flag_bits(self, nlzb: u32, nhfb: u32) -> &'static [bool] {
923        match self {
924            Self::Literal(_) => huff_flag_bits(nlzb <= nhfb),
925            Self::LongLz(_) => long_lz_flag_bits(nlzb > nhfb),
926            Self::ShortLz(_) | Self::RepeatLast(_) | Self::OldDist(_) => &[false, false],
927        }
928    }
929}
930
931#[derive(Debug, Clone, Copy, PartialEq, Eq)]
932struct ShortLz {
933    pub distance: u32,
934    pub length: u32,
935}
936
937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
938struct RepeatLastLz {
939    pub distance: u32,
940    pub length: u32,
941}
942
943#[derive(Debug, Clone, Copy, PartialEq, Eq)]
944struct OldDistLz {
945    pub distance: u32,
946    pub length: u32,
947    pub short_code: u32,
948}
949
950#[derive(Debug, Clone, Copy, PartialEq, Eq)]
951pub struct LongLz {
952    pub distance: u32,
953    pub length: u32,
954}
955
956fn huff_flag_bits(prefer_huff_on_one: bool) -> &'static [bool] {
957    if prefer_huff_on_one {
958        &[true]
959    } else {
960        &[false, true]
961    }
962}
963
964fn long_lz_flag_bits(prefer_long_lz_on_one: bool) -> &'static [bool] {
965    if prefer_long_lz_on_one {
966        &[true]
967    } else {
968        &[false, true]
969    }
970}
971
972fn write_planned_flag_bits(flags: &mut u8, start: usize, bits: &[bool]) {
973    for (offset, &bit) in bits.iter().enumerate() {
974        if bit {
975            *flags |= 1 << (7 - start - offset);
976        }
977    }
978}
979
980fn plan_huff_effect(nhfb: &mut u32, nlzb: &mut u32) {
981    *nhfb += 16;
982    if *nhfb > 0xff {
983        *nhfb = 0x90;
984        *nlzb >>= 1;
985    }
986}
987
988fn l_count_break_bit_cost(l_count: u32) -> usize {
989    usize::from(l_count == 2)
990}
991
992fn find_lz_token(
993    input: &[u8],
994    pos: usize,
995    buckets: &[Vec<usize>],
996    state: LzPlanState,
997    options: EncodeOptions,
998) -> Option<EncodedToken> {
999    find_lz_tokens(input, pos, buckets, state, options)
1000        .into_iter()
1001        .next()
1002}
1003
1004fn find_lz_tokens(
1005    input: &[u8],
1006    pos: usize,
1007    buckets: &[Vec<usize>],
1008    state: LzPlanState,
1009    options: EncodeOptions,
1010) -> Vec<EncodedToken> {
1011    let mut tokens = Vec::with_capacity(4);
1012    if let Some(repeat) = find_repeat_last_lz(input, pos, state.last_dist, state.last_length) {
1013        tokens.push(EncodedToken::RepeatLast(repeat));
1014    }
1015    if options.old_distance_tokens {
1016        if let Some(old_lz) = find_old_dist_lz(
1017            input,
1018            pos,
1019            state.old_dist,
1020            state.old_dist_ptr,
1021            state.max_dist3,
1022        ) {
1023            tokens.push(EncodedToken::OldDist(old_lz));
1024        }
1025    }
1026    if let Some(short_lz) = find_short_lz(input, pos) {
1027        tokens.push(EncodedToken::ShortLz(short_lz));
1028    }
1029    if let Some(long_lz) = find_long_lz_with_buckets(
1030        input,
1031        pos,
1032        options.max_long_match_distance,
1033        buckets,
1034        MAX_LONG_MATCH_CANDIDATES,
1035    )
1036    .filter(|long_lz| long_lz_length_code_for_distance(*long_lz, state.max_dist3).is_some())
1037    {
1038        tokens.push(EncodedToken::LongLz(long_lz));
1039    }
1040    tokens
1041}
1042
1043fn should_lazy_emit_literal(
1044    input: &[u8],
1045    pos: usize,
1046    buckets: &[Vec<usize>],
1047    current: EncodedToken,
1048    max_dist3: u32,
1049    options: EncodeOptions,
1050) -> bool {
1051    if !matches!(current, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1052        || pos + 1 >= input.len()
1053    {
1054        return false;
1055    }
1056
1057    let next = find_lz_token(
1058        input,
1059        pos + 1,
1060        buckets,
1061        LzPlanState {
1062            last_dist: u32::MAX,
1063            last_length: 0,
1064            old_dist: [u32::MAX; 4],
1065            old_dist_ptr: 0,
1066            max_dist3,
1067            nlzb: 0,
1068            nhfb: 0,
1069            l_count: 0,
1070        },
1071        options,
1072    );
1073    next.is_some_and(|next| {
1074        matches!(next, EncodedToken::ShortLz(_) | EncodedToken::LongLz(_))
1075            && next.length() >= current.length() + 2
1076    })
1077}
1078
1079fn find_short_lz(input: &[u8], pos: usize) -> Option<ShortLz> {
1080    if pos < 2 {
1081        return None;
1082    }
1083
1084    let max_distance = pos.min(256);
1085    let mut best = ShortLz {
1086        distance: 0,
1087        length: 0,
1088    };
1089    for distance in 1..=max_distance {
1090        let mut length = 0usize;
1091        while length < 10
1092            && pos + length < input.len()
1093            && input[pos + length] == input[pos + length - distance]
1094        {
1095            length += 1;
1096        }
1097        if length >= 3
1098            && (length > best.length as usize
1099                || (length == best.length as usize && distance < best.distance as usize))
1100        {
1101            best = ShortLz {
1102                distance: distance as u32,
1103                length: length as u32,
1104            };
1105        }
1106    }
1107
1108    (best.length >= 3).then_some(best)
1109}
1110
1111fn find_repeat_last_lz(
1112    input: &[u8],
1113    pos: usize,
1114    last_dist: u32,
1115    last_length: u32,
1116) -> Option<RepeatLastLz> {
1117    if last_dist == u32::MAX || last_dist == 0 || last_length == 0 {
1118        return None;
1119    }
1120    let distance = usize::try_from(last_dist).ok()?;
1121    let length = usize::try_from(last_length).ok()?;
1122    if distance > pos || pos.checked_add(length)? > input.len() {
1123        return None;
1124    }
1125    let matches = (0..length).all(|offset| input[pos + offset] == input[pos + offset - distance]);
1126    matches.then_some(RepeatLastLz {
1127        distance: last_dist,
1128        length: last_length,
1129    })
1130}
1131
1132fn find_old_dist_lz(
1133    input: &[u8],
1134    pos: usize,
1135    old_dist: [u32; 4],
1136    old_dist_ptr: usize,
1137    _max_dist3: u32,
1138) -> Option<OldDistLz> {
1139    let mut best = OldDistLz {
1140        distance: 0,
1141        length: 0,
1142        short_code: 0,
1143    };
1144    for short_code in 10..=13 {
1145        let distance = old_dist[(old_dist_ptr.wrapping_sub((short_code - 9) as usize)) & 3];
1146        if distance == u32::MAX || distance == 0 {
1147            continue;
1148        }
1149        let Ok(distance_usize) = usize::try_from(distance) else {
1150            continue;
1151        };
1152        if distance_usize > pos {
1153            continue;
1154        }
1155        let mut length = 0usize;
1156        while length < 258
1157            && pos + length < input.len()
1158            && input[pos + length] == input[pos + length - distance_usize]
1159        {
1160            length += 1;
1161        }
1162        if length >= 3
1163            && old_dist_lz_is_encodable(length as u32, distance, short_code)
1164            && length > best.length as usize
1165        {
1166            best = OldDistLz {
1167                distance,
1168                length: length as u32,
1169                short_code,
1170            };
1171        }
1172    }
1173
1174    (best.length >= 3).then_some(best)
1175}
1176
1177fn old_dist_lz_is_encodable(length: u32, distance: u32, short_code: u32) -> bool {
1178    old_dist_lz_length_code(length, distance, 0x2001, short_code).is_some()
1179        && old_dist_lz_length_code(length, distance, 0x7f00, short_code).is_some()
1180}
1181
1182fn old_dist_lz_length_code(
1183    length: u32,
1184    distance: u32,
1185    max_dist3: u32,
1186    short_code: u32,
1187) -> Option<u32> {
1188    let decoded_bonus = u32::from(distance > 256) + u32::from(distance >= max_dist3);
1189    let length_code = length.checked_sub(2 + decoded_bonus)?;
1190    if short_code == 10 && length_code == 0xff {
1191        return None;
1192    }
1193    Some(length_code)
1194}
1195
1196fn long_lz_length_code_for_distance(long_lz: LongLz, max_dist3: u32) -> Option<u32> {
1197    let decoded_bonus =
1198        u32::from(long_lz.distance >= max_dist3) + u32::from(long_lz.distance <= 256);
1199    long_lz.length.checked_sub(3 + decoded_bonus)
1200}
1201
1202pub fn find_long_lz(input: &[u8], pos: usize, max_match_distance: usize) -> Option<LongLz> {
1203    if pos < 257 {
1204        return None;
1205    }
1206
1207    let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1208    if max_distance < 257 {
1209        return None;
1210    }
1211    let mut best = LongLz {
1212        distance: 0,
1213        length: 0,
1214    };
1215    for distance in 257..=max_distance {
1216        let mut length = 0usize;
1217        while length < 258
1218            && pos + length < input.len()
1219            && input[pos + length] == input[pos + length - distance]
1220        {
1221            length += 1;
1222        }
1223        if length >= 3 && length > best.length as usize {
1224            best = LongLz {
1225                distance: distance as u32,
1226                length: length as u32,
1227            };
1228        }
1229    }
1230
1231    (best.length >= 3).then_some(best)
1232}
1233
1234fn find_long_lz_with_buckets(
1235    input: &[u8],
1236    pos: usize,
1237    max_match_distance: usize,
1238    buckets: &[Vec<usize>],
1239    max_candidates: usize,
1240) -> Option<LongLz> {
1241    if pos < 257 || pos + 2 >= input.len() || max_candidates == 0 {
1242        return None;
1243    }
1244
1245    let max_distance = pos.min(MAX_LONG_LZ_DISTANCE).min(max_match_distance);
1246    if max_distance < 257 {
1247        return None;
1248    }
1249    let max_length = (input.len() - pos).min(258);
1250    let mut best = LongLz {
1251        distance: 0,
1252        length: 0,
1253    };
1254    let mut checked = 0usize;
1255    for &candidate in buckets[match_hash(input, pos)].iter().rev() {
1256        if candidate >= pos {
1257            continue;
1258        }
1259        let distance = pos - candidate;
1260        if distance > max_distance {
1261            break;
1262        }
1263        if distance < 257 {
1264            continue;
1265        }
1266        checked += 1;
1267        let mut length = 0usize;
1268        while length < max_length && input[pos + length] == input[pos + length - distance] {
1269            length += 1;
1270        }
1271        if length >= 3
1272            && (length > best.length as usize
1273                || (length == best.length as usize && distance < best.distance as usize))
1274        {
1275            best = LongLz {
1276                distance: distance as u32,
1277                length: length as u32,
1278            };
1279            if length == max_length {
1280                break;
1281            }
1282        }
1283        if checked >= max_candidates {
1284            break;
1285        }
1286    }
1287
1288    (best.length >= 3).then_some(best)
1289}
1290
1291fn long_lz_buckets(input: &[u8]) -> Vec<Vec<usize>> {
1292    let mut buckets = vec![Vec::new(); MATCH_HASH_BUCKETS];
1293    for pos in 0..input.len().saturating_sub(2) {
1294        buckets[match_hash(input, pos)].push(pos);
1295    }
1296    buckets
1297}
1298
1299fn match_hash(input: &[u8], pos: usize) -> usize {
1300    let value =
1301        ((input[pos] as usize) << 8) ^ ((input[pos + 1] as usize) << 4) ^ input[pos + 2] as usize;
1302    value & (MATCH_HASH_BUCKETS - 1)
1303}
1304
1305fn emit_long_lz_length(bits: &mut BitWriter, avr_ln2: u32, length_code: u32) -> Result<()> {
1306    if avr_ln2 >= 122 {
1307        return emit_decode_num(bits, length_code, 3, DEC_L2, POS_L2);
1308    }
1309    if avr_ln2 >= 64 {
1310        return emit_decode_num(bits, length_code, 2, DEC_L1, POS_L1);
1311    }
1312    if length_code <= 7 {
1313        bits.write_bits(1, length_code as usize + 1);
1314        return Ok(());
1315    }
1316    if length_code < 0x100 {
1317        bits.write_bits(length_code, 16);
1318        return Ok(());
1319    }
1320    Err(Error::InvalidData(
1321        "RAR 1.3 LongLZ encoder length is not encodable",
1322    ))
1323}
1324
1325fn emit_decode_num(
1326    bits: &mut BitWriter,
1327    target: u32,
1328    start_pos: u32,
1329    dec_tab: &[u16],
1330    pos_tab: &[u16],
1331) -> Result<()> {
1332    if let Some((code, len)) = encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab) {
1333        bits.write_bits(code, len);
1334        return Ok(());
1335    }
1336    Err(Error::InvalidData(
1337        "RAR 1.3 DecodeNum value is not encodable",
1338    ))
1339}
1340
1341fn decode_num_bit_cost(
1342    target: u32,
1343    start_pos: u32,
1344    dec_tab: &[u16],
1345    pos_tab: &[u16],
1346) -> Option<usize> {
1347    encode_decode_num_prefix(target, start_pos, dec_tab, pos_tab).map(|(_, len)| len)
1348}
1349
1350fn encode_decode_num_prefix(
1351    target: u32,
1352    start_pos: u32,
1353    dec_tab: &[u16],
1354    pos_tab: &[u16],
1355) -> Option<(u32, usize)> {
1356    let end = 16.min(pos_tab.len().saturating_sub(1));
1357    for (len, &base) in pos_tab
1358        .iter()
1359        .enumerate()
1360        .take(end + 1)
1361        .skip(start_pos as usize)
1362    {
1363        let dec_index = len.checked_sub(start_pos as usize)?;
1364        let upper = u32::from(*dec_tab.get(dec_index)?);
1365        let previous = if dec_index == 0 {
1366            0
1367        } else {
1368            u32::from(dec_tab[dec_index - 1])
1369        };
1370        let max_num = upper.checked_sub(1)? & !0xf;
1371        if max_num < previous {
1372            continue;
1373        }
1374        let base = u32::from(base);
1375        let max_target = ((max_num - previous) >> (16 - len)) + base;
1376        if target >= base && target <= max_target {
1377            let num = previous + ((target - base) << (16 - len));
1378            return Some((num >> (16 - len), len));
1379        }
1380    }
1381    None
1382}
1383
1384#[cfg(test)]
1385fn decode_num_prefix_is_stable(
1386    code: u32,
1387    len: usize,
1388    target: u32,
1389    start_pos: u32,
1390    dec_tab: &[u16],
1391    pos_tab: &[u16],
1392) -> bool {
1393    let relevant_tail_bits = 16usize.saturating_sub(len + 4);
1394    for tail in 0..(1u32 << relevant_tail_bits) {
1395        let bit_field = (code << (16 - len)) | (tail << 4);
1396        let (decoded, consumed) = simulate_decode_num(bit_field, start_pos, dec_tab, pos_tab);
1397        if decoded != target || consumed != len {
1398            return false;
1399        }
1400    }
1401    true
1402}
1403
1404#[cfg(test)]
1405fn simulate_decode_num(
1406    bit_field: u32,
1407    mut start_pos: u32,
1408    dec_tab: &[u16],
1409    pos_tab: &[u16],
1410) -> (u32, usize) {
1411    let num = bit_field & 0xfff0;
1412    let mut i = 0usize;
1413    while dec_tab[i] as u32 <= num {
1414        start_pos += 1;
1415        i += 1;
1416    }
1417    (
1418        ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1419            + pos_tab[start_pos as usize] as u32,
1420        start_pos as usize,
1421    )
1422}
1423
1424#[derive(Clone)]
1425pub struct Unpack15 {
1426    bits: BitReader,
1427    target: usize,
1428    output_written: usize,
1429    window: [u8; 0x10000],
1430    unp_ptr: usize,
1431    prev_ptr: usize,
1432    first_win_done: bool,
1433    // State names follow RAR13_FORMAT_SPECIFICATION.md ยง6 so the codec state
1434    // lines up directly with the documented Unpack15 tables and traces.
1435    ch_set: [u16; 256],
1436    ch_set_a: [u16; 256],
1437    ch_set_b: [u16; 256],
1438    ch_set_c: [u16; 256],
1439    n_to_pl: [u8; 256],
1440    n_to_pl_b: [u8; 256],
1441    n_to_pl_c: [u8; 256],
1442    avr_plc: u32,
1443    avr_plc_b: u32,
1444    avr_ln1: u32,
1445    avr_ln2: u32,
1446    avr_ln3: u32,
1447    max_dist3: u32,
1448    nhfb: u32,
1449    nlzb: u32,
1450    num_huf: u32,
1451    buf60: u32,
1452    st_mode: bool,
1453    l_count: u32,
1454    flag_buf: u32,
1455    flags_cnt: i32,
1456    old_dist: [u32; 4],
1457    old_dist_ptr: usize,
1458    last_dist: u32,
1459    last_length: u32,
1460}
1461
1462impl Unpack15 {
1463    pub fn new() -> Self {
1464        Self {
1465            bits: BitReader::new(&[]),
1466            target: 0,
1467            output_written: 0,
1468            window: [0; 0x10000],
1469            unp_ptr: 0,
1470            prev_ptr: 0,
1471            first_win_done: false,
1472            ch_set: [0; 256],
1473            ch_set_a: [0; 256],
1474            ch_set_b: [0; 256],
1475            ch_set_c: [0; 256],
1476            n_to_pl: [0; 256],
1477            n_to_pl_b: [0; 256],
1478            n_to_pl_c: [0; 256],
1479            avr_plc: 0x3500,
1480            avr_plc_b: 0,
1481            avr_ln1: 0,
1482            avr_ln2: 0,
1483            avr_ln3: 0,
1484            max_dist3: 0x2001,
1485            nhfb: 0x80,
1486            nlzb: 0x80,
1487            num_huf: 0,
1488            buf60: 0,
1489            st_mode: false,
1490            l_count: 0,
1491            flag_buf: 0,
1492            flags_cnt: 0,
1493            old_dist: [u32::MAX; 4],
1494            old_dist_ptr: 0,
1495            last_dist: u32::MAX,
1496            last_length: 0,
1497        }
1498    }
1499
1500    pub fn decode_member(&mut self, input: &[u8], target: usize, solid: bool) -> Result<Vec<u8>> {
1501        let mut output = Vec::with_capacity(target);
1502        self.decode_member_to(input, target, solid, &mut output)?;
1503        Ok(output)
1504    }
1505
1506    pub fn decode_member_to(
1507        &mut self,
1508        input: &[u8],
1509        target: usize,
1510        solid: bool,
1511        out: &mut impl Write,
1512    ) -> Result<()> {
1513        self.init_member(target, solid);
1514        self.bits = BitReader::new_final(input);
1515        self.decode_loop(out).map_err(|error| match error {
1516            Error::NeedMoreInput => Error::InvalidData("RAR 1.3 bitstream is truncated"),
1517            error => error,
1518        })
1519    }
1520
1521    pub fn decode_member_from_reader(
1522        &mut self,
1523        input: &mut impl Read,
1524        target: usize,
1525        solid: bool,
1526        out: &mut impl Write,
1527    ) -> Result<()> {
1528        const OUTPUT_CHUNK: usize = 64 * 1024;
1529
1530        self.init_member(target, solid);
1531        self.bits = BitReader::new(&[]);
1532        let mut packed = Vec::new();
1533        input
1534            .read_to_end(&mut packed)
1535            .map_err(|_| Error::InvalidData("RAR 1.3 input read failed"))?;
1536        self.bits.append(&packed);
1537        self.bits.finish();
1538
1539        while self.output_written < self.target {
1540            let chunk_target = self
1541                .output_written
1542                .saturating_add(OUTPUT_CHUNK)
1543                .min(self.target);
1544            let mut chunk = Vec::with_capacity(chunk_target - self.output_written);
1545            self.decode_loop_until(chunk_target, &mut chunk)
1546                .map_err(|error| match error {
1547                    Error::NeedMoreInput => Error::InvalidData("RAR 1.3 bitstream is truncated"),
1548                    error => error,
1549                })?;
1550            out.write_all(&chunk)
1551                .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
1552        }
1553        Ok(())
1554    }
1555
1556    fn init_member(&mut self, target: usize, solid: bool) {
1557        self.target = target;
1558        self.output_written = 0;
1559        self.flags_cnt = -2;
1560        self.flag_buf = 0;
1561        self.st_mode = false;
1562        self.l_count = 0;
1563
1564        if !solid {
1565            self.reset_non_solid();
1566        }
1567    }
1568
1569    fn decode_loop(&mut self, out: &mut impl Write) -> Result<()> {
1570        if self.target == 0 {
1571            return Ok(());
1572        }
1573
1574        self.decode_loop_until(self.target, out)
1575    }
1576
1577    fn decode_loop_until(&mut self, target: usize, out: &mut impl Write) -> Result<()> {
1578        while self.output_written < target {
1579            self.decode_step(out)?;
1580        }
1581
1582        Ok(())
1583    }
1584
1585    fn decode_step(&mut self, out: &mut impl Write) -> Result<()> {
1586        if self.flags_cnt == -2 {
1587            self.get_flags_buf()?;
1588            self.flags_cnt = 8;
1589        }
1590
1591        self.unp_ptr &= 0xffff;
1592        self.first_win_done |= self.prev_ptr > self.unp_ptr;
1593        self.prev_ptr = self.unp_ptr;
1594
1595        if self.st_mode {
1596            return self.huff_decode(out);
1597        }
1598
1599        self.flags_cnt -= 1;
1600        if self.flags_cnt < 0 {
1601            self.get_flags_buf()?;
1602            self.flags_cnt = 7;
1603        }
1604
1605        if self.flag_buf & 0x80 != 0 {
1606            self.flag_buf = (self.flag_buf << 1) & 0xff;
1607            if self.nlzb > self.nhfb {
1608                self.long_lz(out)
1609            } else {
1610                self.huff_decode(out)
1611            }
1612        } else {
1613            self.flag_buf = (self.flag_buf << 1) & 0xff;
1614            self.flags_cnt -= 1;
1615            if self.flags_cnt < 0 {
1616                self.get_flags_buf()?;
1617                self.flags_cnt = 7;
1618            }
1619            if self.flag_buf & 0x80 != 0 {
1620                self.flag_buf = (self.flag_buf << 1) & 0xff;
1621                if self.nlzb > self.nhfb {
1622                    self.huff_decode(out)
1623                } else {
1624                    self.long_lz(out)
1625                }
1626            } else {
1627                self.flag_buf = (self.flag_buf << 1) & 0xff;
1628                self.short_lz(out)
1629            }
1630        }
1631    }
1632
1633    fn reset_non_solid(&mut self) {
1634        self.window = [0; 0x10000];
1635        self.unp_ptr = 0;
1636        self.prev_ptr = 0;
1637        self.first_win_done = false;
1638        self.avr_plc_b = 0;
1639        self.avr_ln1 = 0;
1640        self.avr_ln2 = 0;
1641        self.avr_ln3 = 0;
1642        self.num_huf = 0;
1643        self.buf60 = 0;
1644        self.avr_plc = 0x3500;
1645        self.max_dist3 = 0x2001;
1646        self.nhfb = 0x80;
1647        self.nlzb = 0x80;
1648        self.old_dist = [u32::MAX; 4];
1649        self.old_dist_ptr = 0;
1650        self.last_dist = u32::MAX;
1651        self.last_length = 0;
1652        self.init_huff();
1653    }
1654
1655    fn short_lz(&mut self, out: &mut impl Write) -> Result<()> {
1656        self.num_huf = 0;
1657        let mut bit_field = self.bits.get_bits()?;
1658        if self.l_count == 2 {
1659            self.bits.add_bits(1);
1660            if bit_field >= 0x8000 {
1661                self.copy_string(self.last_dist, self.last_length, out)?;
1662                return Ok(());
1663            }
1664            bit_field = (bit_field << 1) & 0xffff;
1665            self.l_count = 0;
1666        }
1667
1668        let bit_byte = (bit_field >> 8) as u8;
1669        let mut length = 0usize;
1670        if self.avr_ln1 < 37 {
1671            while length < SHORT_XOR1.len() {
1672                let short_len = self.short_len1(length);
1673                let mask = (!(0xffu16 >> short_len)) as u8;
1674                if ((bit_byte ^ SHORT_XOR1[length]) & mask) == 0 {
1675                    break;
1676                }
1677                length += 1;
1678            }
1679            self.bits.add_bits(self.short_len1(length) as usize);
1680        } else {
1681            while length < SHORT_XOR2.len() {
1682                let short_len = self.short_len2(length);
1683                let mask = (!(0xffu16 >> short_len)) as u8;
1684                if ((bit_byte ^ SHORT_XOR2[length]) & mask) == 0 {
1685                    break;
1686                }
1687                length += 1;
1688            }
1689            self.bits.add_bits(self.short_len2(length) as usize);
1690        }
1691
1692        let mut length = length as u32;
1693        if length >= 9 {
1694            if length == 9 {
1695                self.l_count += 1;
1696                self.copy_string(self.last_dist, self.last_length, out)?;
1697                return Ok(());
1698            }
1699            if length == 14 {
1700                self.l_count = 0;
1701                length = self.decode_num(self.bits.get_bits()?, 3, DEC_L2, POS_L2) + 5;
1702                let distance = (self.bits.get_bits()? >> 1) | 0x8000;
1703                self.bits.add_bits(15);
1704                self.last_length = length;
1705                self.last_dist = distance;
1706                self.copy_string(distance, length, out)?;
1707                return Ok(());
1708            }
1709
1710            self.l_count = 0;
1711            let save_length = length;
1712            let distance =
1713                self.old_dist[(self.old_dist_ptr.wrapping_sub((length - 9) as usize)) & 3];
1714            length = self.decode_num(self.bits.get_bits()?, 2, DEC_L1, POS_L1) + 2;
1715            if length == 0x101 && save_length == 10 {
1716                self.buf60 ^= 1;
1717                return Ok(());
1718            }
1719            if distance > 256 {
1720                length += 1;
1721            }
1722            if distance >= self.max_dist3 {
1723                length += 1;
1724            }
1725
1726            self.remember_match(distance, length);
1727            self.copy_string(distance, length, out)?;
1728            return Ok(());
1729        }
1730
1731        self.l_count = 0;
1732        self.avr_ln1 += length;
1733        self.avr_ln1 -= self.avr_ln1 >> 4;
1734
1735        let distance_place =
1736            (self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) & 0xff) as usize;
1737        let mut distance = self.ch_set_a[distance_place] as u32;
1738        if distance_place > 0 {
1739            let last_distance = self.ch_set_a[distance_place - 1];
1740            self.ch_set_a[distance_place] = last_distance;
1741            self.ch_set_a[distance_place - 1] = distance as u16;
1742        }
1743        length += 2;
1744        distance += 1;
1745        self.remember_match(distance, length);
1746        self.copy_string(distance, length, out)
1747    }
1748
1749    fn long_lz(&mut self, out: &mut impl Write) -> Result<()> {
1750        self.num_huf = 0;
1751        self.nlzb += 16;
1752        if self.nlzb > 0xff {
1753            self.nlzb = 0x90;
1754            self.nhfb >>= 1;
1755        }
1756        let old_avr2 = self.avr_ln2;
1757
1758        let bit_field = self.bits.get_bits()?;
1759        let mut length = if self.avr_ln2 >= 122 {
1760            self.decode_num(bit_field, 3, DEC_L2, POS_L2)
1761        } else if self.avr_ln2 >= 64 {
1762            self.decode_num(bit_field, 2, DEC_L1, POS_L1)
1763        } else if bit_field < 0x100 {
1764            self.bits.add_bits(16);
1765            bit_field
1766        } else {
1767            let mut length = 0u32;
1768            while ((bit_field << length) & 0x8000) == 0 {
1769                length += 1;
1770            }
1771            self.bits.add_bits((length + 1) as usize);
1772            length
1773        };
1774
1775        self.avr_ln2 += length;
1776        self.avr_ln2 -= self.avr_ln2 >> 5;
1777
1778        let bit_field = self.bits.get_bits()?;
1779        let distance_place = if self.avr_plc_b > 0x28ff {
1780            self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1781        } else if self.avr_plc_b > 0x06ff {
1782            self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1783        } else {
1784            self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1785        };
1786
1787        self.avr_plc_b += distance_place;
1788        self.avr_plc_b -= self.avr_plc_b >> 8;
1789
1790        let idx = (distance_place & 0xff) as usize;
1791        let mut distance;
1792        let mut new_distance_place;
1793        loop {
1794            distance = self.ch_set_b[idx] as u32;
1795            new_distance_place = self.n_to_pl_b[(distance & 0xff) as usize] as usize;
1796            self.n_to_pl_b[(distance & 0xff) as usize] =
1797                self.n_to_pl_b[(distance & 0xff) as usize].wrapping_add(1);
1798            distance += 1;
1799            if distance & 0xff == 0 {
1800                corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
1801            } else {
1802                break;
1803            }
1804        }
1805
1806        self.ch_set_b[idx] = self.ch_set_b[new_distance_place];
1807        self.ch_set_b[new_distance_place] = distance as u16;
1808
1809        distance = ((distance & 0xff00) | (self.bits.get_bits()? >> 8)) >> 1;
1810        self.bits.add_bits(7);
1811
1812        let old_avr3 = self.avr_ln3;
1813        if length != 1 && length != 4 {
1814            if length == 0 && distance <= self.max_dist3 {
1815                self.avr_ln3 += 1;
1816                self.avr_ln3 -= self.avr_ln3 >> 8;
1817            } else if self.avr_ln3 > 0 {
1818                self.avr_ln3 -= 1;
1819            }
1820        }
1821        length += 3;
1822        if distance >= self.max_dist3 {
1823            length += 1;
1824        }
1825        if distance <= 256 {
1826            length += 8;
1827        }
1828        if old_avr3 > 0xb0 || (self.avr_plc >= 0x2a00 && old_avr2 < 0x40) {
1829            self.max_dist3 = 0x7f00;
1830        } else {
1831            self.max_dist3 = 0x2001;
1832        }
1833
1834        self.remember_match(distance, length);
1835        self.copy_string(distance, length, out)
1836    }
1837
1838    fn huff_decode(&mut self, out: &mut impl Write) -> Result<()> {
1839        let bit_field = self.bits.get_bits()?;
1840
1841        let mut byte_place = if self.avr_plc > 0x75ff {
1842            self.decode_num(bit_field, 8, DEC_HF4, POS_HF4)
1843        } else if self.avr_plc > 0x5dff {
1844            self.decode_num(bit_field, 6, DEC_HF3, POS_HF3)
1845        } else if self.avr_plc > 0x35ff {
1846            self.decode_num(bit_field, 5, DEC_HF2, POS_HF2)
1847        } else if self.avr_plc > 0x0dff {
1848            self.decode_num(bit_field, 5, DEC_HF1, POS_HF1)
1849        } else {
1850            self.decode_num(bit_field, 4, DEC_HF0, POS_HF0)
1851        } & 0xff;
1852
1853        if self.st_mode {
1854            if byte_place == 0 && bit_field > 0x0fff {
1855                byte_place = 0x100;
1856            }
1857            if byte_place == 0 {
1858                let bit_field = self.bits.get_bits()?;
1859                self.bits.add_bits(1);
1860                if bit_field & 0x8000 != 0 {
1861                    self.num_huf = 0;
1862                    self.st_mode = false;
1863                    return Ok(());
1864                }
1865
1866                let length = if bit_field & 0x4000 != 0 { 4 } else { 3 };
1867                self.bits.add_bits(1);
1868                let mut distance = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2);
1869                distance = (distance << 5) | (self.bits.get_bits()? >> 11);
1870                self.bits.add_bits(5);
1871                self.copy_string(distance, length, out)?;
1872                return Ok(());
1873            }
1874            byte_place -= 1;
1875        } else {
1876            if self.num_huf >= 16 && self.flags_cnt == 0 {
1877                self.st_mode = true;
1878            }
1879            self.num_huf += 1;
1880        }
1881
1882        self.avr_plc += byte_place;
1883        self.avr_plc -= self.avr_plc >> 8;
1884        self.nhfb += 16;
1885        if self.nhfb > 0xff {
1886            self.nhfb = 0x90;
1887            self.nlzb >>= 1;
1888        }
1889
1890        let byte = (self.ch_set[byte_place as usize] >> 8) as u8;
1891        self.put_byte(byte, out)?;
1892
1893        let idx = byte_place as usize;
1894        let mut cur_byte;
1895        let mut new_byte_place;
1896        loop {
1897            cur_byte = self.ch_set[idx] as u32;
1898            new_byte_place = self.n_to_pl[(cur_byte & 0xff) as usize] as usize;
1899            self.n_to_pl[(cur_byte & 0xff) as usize] =
1900                self.n_to_pl[(cur_byte & 0xff) as usize].wrapping_add(1);
1901            cur_byte += 1;
1902            if cur_byte & 0xff > 0xa1 {
1903                corr_huff(&mut self.ch_set, &mut self.n_to_pl);
1904            } else {
1905                break;
1906            }
1907        }
1908
1909        self.ch_set[idx] = self.ch_set[new_byte_place];
1910        self.ch_set[new_byte_place] = cur_byte as u16;
1911        Ok(())
1912    }
1913
1914    fn get_flags_buf(&mut self) -> Result<()> {
1915        let flags_place = self.decode_num(self.bits.get_bits()?, 5, DEC_HF2, POS_HF2) as usize;
1916        if flags_place >= self.ch_set_c.len() {
1917            return Ok(());
1918        }
1919
1920        let mut flags;
1921        let mut new_flags_place;
1922        loop {
1923            flags = self.ch_set_c[flags_place] as u32;
1924            new_flags_place = self.n_to_pl_c[(flags & 0xff) as usize] as usize;
1925            self.n_to_pl_c[(flags & 0xff) as usize] =
1926                self.n_to_pl_c[(flags & 0xff) as usize].wrapping_add(1);
1927            self.flag_buf = flags >> 8;
1928            flags += 1;
1929            if flags & 0xff == 0 {
1930                corr_huff(&mut self.ch_set_c, &mut self.n_to_pl_c);
1931            } else {
1932                break;
1933            }
1934        }
1935
1936        self.ch_set_c[flags_place] = self.ch_set_c[new_flags_place];
1937        self.ch_set_c[new_flags_place] = flags as u16;
1938        Ok(())
1939    }
1940
1941    fn decode_num(
1942        &mut self,
1943        num: u32,
1944        mut start_pos: u32,
1945        dec_tab: &[u16],
1946        pos_tab: &[u16],
1947    ) -> u32 {
1948        let num = num & 0xfff0;
1949        let mut i = 0usize;
1950        while dec_tab[i] as u32 <= num {
1951            start_pos += 1;
1952            i += 1;
1953        }
1954        self.bits.add_bits(start_pos as usize);
1955        ((num - if i > 0 { dec_tab[i - 1] as u32 } else { 0 }) >> (16 - start_pos))
1956            + pos_tab[start_pos as usize] as u32
1957    }
1958
1959    fn copy_string(&mut self, distance: u32, length: u32, out: &mut impl Write) -> Result<()> {
1960        if self.output_written + length as usize > self.target {
1961            return Err(Error::InvalidData("RAR 1.3 match exceeds output size"));
1962        }
1963
1964        if (!self.first_win_done && distance as usize > self.unp_ptr)
1965            || distance as usize > 0x10000
1966            || distance == 0
1967        {
1968            for _ in 0..length {
1969                self.put_byte(0, out)?;
1970            }
1971        } else {
1972            for _ in 0..length {
1973                let byte = self.window[(self.unp_ptr.wrapping_sub(distance as usize)) & 0xffff];
1974                self.put_byte(byte, out)?;
1975            }
1976        }
1977        Ok(())
1978    }
1979
1980    fn put_byte(&mut self, byte: u8, out: &mut impl Write) -> Result<()> {
1981        if self.output_written >= self.target {
1982            return Err(Error::InvalidData("RAR 1.3 literal exceeds output size"));
1983        }
1984        self.window[self.unp_ptr] = byte;
1985        self.unp_ptr = (self.unp_ptr + 1) & 0xffff;
1986        out.write_all(&[byte])
1987            .map_err(|_| Error::InvalidData("RAR 1.3 output write failed"))?;
1988        self.output_written += 1;
1989        Ok(())
1990    }
1991
1992    fn remember_match(&mut self, distance: u32, length: u32) {
1993        self.old_dist[self.old_dist_ptr] = distance;
1994        self.old_dist_ptr = (self.old_dist_ptr + 1) & 3;
1995        self.last_length = length;
1996        self.last_dist = distance;
1997    }
1998
1999    fn short_len1(&self, pos: usize) -> u32 {
2000        if pos == 1 {
2001            self.buf60 + 3
2002        } else {
2003            SHORT_LEN1[pos] as u32
2004        }
2005    }
2006
2007    fn short_len2(&self, pos: usize) -> u32 {
2008        if pos == 3 {
2009            self.buf60 + 3
2010        } else {
2011            SHORT_LEN2[pos] as u32
2012        }
2013    }
2014
2015    fn init_huff(&mut self) {
2016        for i in 0..256 {
2017            self.ch_set[i] = (i as u16) << 8;
2018            self.ch_set_b[i] = (i as u16) << 8;
2019            self.ch_set_a[i] = i as u16;
2020            self.ch_set_c[i] = (0u8.wrapping_sub(i as u8) as u16) << 8;
2021        }
2022        self.n_to_pl = [0; 256];
2023        self.n_to_pl_b = [0; 256];
2024        self.n_to_pl_c = [0; 256];
2025        corr_huff(&mut self.ch_set_b, &mut self.n_to_pl_b);
2026    }
2027}
2028
2029impl Default for Unpack15 {
2030    fn default() -> Self {
2031        Self::new()
2032    }
2033}
2034
2035fn corr_huff(char_set: &mut [u16; 256], num_to_place: &mut [u8; 256]) {
2036    let mut pos = 0usize;
2037    for rank in (0..=7).rev() {
2038        for _ in 0..32 {
2039            char_set[pos] = (char_set[pos] & !0xff) | rank;
2040            pos += 1;
2041        }
2042    }
2043    *num_to_place = [0; 256];
2044    for rank in (0..=6).rev() {
2045        num_to_place[rank] = ((7 - rank) * 32) as u8;
2046    }
2047}
2048
2049#[cfg(test)]
2050mod tests {
2051    use super::{
2052        decode_num_bit_cost, decode_num_prefix_is_stable, find_long_lz, find_lz_token,
2053        find_old_dist_lz, long_lz_buckets, should_lazy_emit_literal, unpack15_decode,
2054        unpack15_encode, EncodeOptions, EncodedToken, LongLz, LzPlanState, OldDistLz, ShortLz,
2055        Unpack15, Unpack15Encoder, DEC_HF0, DEC_HF1, DEC_HF2, DEC_HF3, DEC_HF4, DEC_L1, DEC_L2,
2056        POS_HF0, POS_HF1, POS_HF2, POS_HF3, POS_HF4, POS_L1, POS_L2,
2057    };
2058
2059    fn brute_decode_num_bit_cost(
2060        target: u32,
2061        start_pos: u32,
2062        dec_tab: &[u16],
2063        pos_tab: &[u16],
2064    ) -> Option<usize> {
2065        for len in start_pos as usize..=16 {
2066            for code in 0..(1u32 << len) {
2067                if decode_num_prefix_is_stable(code, len, target, start_pos, dec_tab, pos_tab) {
2068                    return Some(len);
2069                }
2070            }
2071        }
2072        None
2073    }
2074
2075    #[test]
2076    fn decode_member_from_reader_accepts_incremental_input() {
2077        struct TinyReader<'a> {
2078            input: &'a [u8],
2079        }
2080
2081        impl std::io::Read for TinyReader<'_> {
2082            fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
2083                if self.input.is_empty() {
2084                    return Ok(0);
2085                }
2086                let len = self.input.len().min(out.len()).min(2);
2087                out[..len].copy_from_slice(&self.input[..len]);
2088                self.input = &self.input[len..];
2089                Ok(len)
2090            }
2091        }
2092
2093        let expected = b"RAR 1.4 incremental input fixture\n".repeat(32);
2094        let packed = unpack15_encode(&expected).unwrap();
2095        assert_eq!(unpack15_decode(&packed, expected.len()).unwrap(), expected);
2096
2097        let mut reader = TinyReader { input: &packed };
2098        let mut decoder = Unpack15::new();
2099        let mut output = Vec::new();
2100        decoder
2101            .decode_member_from_reader(&mut reader, expected.len(), false, &mut output)
2102            .unwrap();
2103
2104        assert_eq!(output, expected);
2105    }
2106
2107    #[test]
2108    fn decode_num_bit_cost_matches_prefix_search_at_table_boundaries() {
2109        let tables = [
2110            (4, DEC_HF0, POS_HF0),
2111            (5, DEC_HF1, POS_HF1),
2112            (5, DEC_HF2, POS_HF2),
2113            (6, DEC_HF3, POS_HF3),
2114            (8, DEC_HF4, POS_HF4),
2115            (2, DEC_L1, POS_L1),
2116            (3, DEC_L2, POS_L2),
2117        ];
2118        let targets = [0, 1, 2, 3, 7, 8, 16, 24, 32, 33, 53, 117, 233, 255, 256];
2119
2120        for (start_pos, dec_tab, pos_tab) in tables {
2121            for target in targets {
2122                assert_eq!(
2123                    decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2124                    brute_decode_num_bit_cost(target, start_pos, dec_tab, pos_tab),
2125                    "target {target}, start_pos {start_pos}"
2126                );
2127            }
2128        }
2129    }
2130
2131    #[test]
2132    fn encoder_emits_rar15_very_long_lz_matches() {
2133        let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2134        input.extend_from_within(..258);
2135
2136        assert_eq!(
2137            find_long_lz(&input, 300, 0x8000),
2138            Some(LongLz {
2139                distance: 300,
2140                length: 258
2141            })
2142        );
2143        let packed = unpack15_encode(&input).unwrap();
2144
2145        assert!(
2146            packed.len() < 330,
2147            "very-long LongLZ should encode a 258-byte repeat compactly, got {} bytes",
2148            packed.len()
2149        );
2150        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2151    }
2152
2153    #[test]
2154    fn encoder_adjusts_rar15_long_lz_length_for_far_distance_bonus() {
2155        let mut input: Vec<_> = (0..9000).map(|index| (index * 73 + 19) as u8).collect();
2156        input.extend_from_within(..10);
2157
2158        let packed = unpack15_encode(&input).unwrap();
2159
2160        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2161    }
2162
2163    #[test]
2164    fn encoder_reuses_rar15_repeat_last_token() {
2165        let input = b"abcdefghijklmnop".repeat(64);
2166        let packed = unpack15_encode(&input).unwrap();
2167
2168        assert!(
2169            packed.len() < 100,
2170            "repeat-last tokens should keep a simple repeated pattern compact, got {} bytes",
2171            packed.len()
2172        );
2173        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2174    }
2175
2176    #[test]
2177    fn old_distance_finder_maps_ring_entries_to_short_lz_codes() {
2178        let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2179        let pos = input.len();
2180        input.extend_from_within(pos - 33..pos - 13);
2181
2182        assert_eq!(
2183            find_old_dist_lz(&input, pos, [11, 22, 33, 44], 0, 0x2001),
2184            Some(OldDistLz {
2185                distance: 33,
2186                length: 20,
2187                short_code: 11,
2188            })
2189        );
2190    }
2191
2192    #[test]
2193    fn old_distance_finder_rejects_buf60_toggle_encoding() {
2194        let mut input = b"abcd".repeat(128);
2195        let pos = input.len();
2196        input.extend((0..257).map(|index| b"abcd"[index % 4]));
2197
2198        assert_eq!(
2199            find_old_dist_lz(&input, pos, [u32::MAX, 4, u32::MAX, u32::MAX], 2, 0x2001),
2200            None,
2201            "short-code 10 with decoded base length 0x101 is the Buf60 toggle, not a match"
2202        );
2203    }
2204
2205    #[test]
2206    fn planner_emits_safe_old_distance_token() {
2207        let mut input: Vec<_> = (0..80).map(|index| (index * 37 + 11) as u8).collect();
2208        let pos = input.len();
2209        input.extend_from_within(pos - 33..pos - 13);
2210
2211        let encoder = Unpack15Encoder::new();
2212        let buckets = long_lz_buckets(&input);
2213        let token = encoder
2214            .choose_lz_token(
2215                &input,
2216                pos,
2217                &buckets,
2218                LzPlanState {
2219                    last_dist: u32::MAX,
2220                    last_length: 0,
2221                    old_dist: [11, 22, 33, 44],
2222                    old_dist_ptr: 0,
2223                    max_dist3: 0x2001,
2224                    nlzb: encoder.nlzb,
2225                    nhfb: encoder.nhfb,
2226                    l_count: encoder.l_count,
2227                },
2228                0,
2229            )
2230            .expect("old-distance candidate should be selected");
2231
2232        assert_eq!(
2233            token,
2234            EncodedToken::OldDist(OldDistLz {
2235                distance: 33,
2236                length: 20,
2237                short_code: 11,
2238            })
2239        );
2240    }
2241
2242    #[test]
2243    fn encoder_exits_stmode_when_literal_runs_trigger_decoder_mode() {
2244        let input: Vec<_> = (0..96).map(|index| (index * 73 + 19) as u8).collect();
2245        let packed = unpack15_encode(&input).unwrap();
2246
2247        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2248    }
2249
2250    #[test]
2251    fn encoder_emits_stmode_literals_for_long_literal_runs() {
2252        let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2253        let mut encoder = Unpack15Encoder::new();
2254        let packed = encoder.encode_member(&input).unwrap();
2255
2256        assert!(
2257            encoder.stmode_literal_count > 0,
2258            "long literal runs should use stmode literals before exiting stmode"
2259        );
2260        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2261    }
2262
2263    #[test]
2264    fn encoder_options_can_disable_stmode_literal_runs() {
2265        let input: Vec<_> = (0..128).map(|index| (index * 73 + 19) as u8).collect();
2266        let mut encoder =
2267            Unpack15Encoder::with_options(EncodeOptions::new().with_stmode_literal_runs(false));
2268        let packed = encoder.encode_member(&input).unwrap();
2269
2270        assert_eq!(encoder.stmode_literal_count, 0);
2271        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2272    }
2273
2274    #[test]
2275    fn encoder_options_bound_long_lz_search_distance() {
2276        let mut input: Vec<_> = (0u8..=255).cycle().take(300).collect();
2277        input.extend_from_within(..64);
2278
2279        assert_eq!(find_long_lz(&input, 300, 256), None);
2280        assert_eq!(
2281            find_long_lz(&input, 300, 0x8000),
2282            Some(LongLz {
2283                distance: 300,
2284                length: 64
2285            })
2286        );
2287    }
2288
2289    #[test]
2290    fn long_lz_search_rejects_unencodable_32k_distance() {
2291        let mut input = Vec::with_capacity(0x8000 + 64);
2292        let mut state = 0x1234_5678u32;
2293        for _ in 0..0x8000 + 64 {
2294            state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
2295            input.push((state >> 24) as u8);
2296        }
2297        let repeated = input[..64].to_vec();
2298        input[0x8000..0x8000 + 64].copy_from_slice(&repeated);
2299
2300        let token = find_long_lz(&input, 0x8000, 0x8000);
2301
2302        assert_ne!(token.map(|token| token.distance), Some(0x8000));
2303    }
2304
2305    #[test]
2306    fn lazy_match_prefers_longer_next_position_match() {
2307        let input = b"abcXbcQRSTabcQRSTUV";
2308        let buckets = long_lz_buckets(input);
2309        let token = find_lz_token(
2310            input,
2311            10,
2312            &buckets,
2313            LzPlanState {
2314                last_dist: u32::MAX,
2315                last_length: 0,
2316                old_dist: [u32::MAX; 4],
2317                old_dist_ptr: 0,
2318                max_dist3: 0x2001,
2319                nlzb: 0,
2320                nhfb: 0,
2321                l_count: 0,
2322            },
2323            EncodeOptions::default(),
2324        )
2325        .unwrap();
2326
2327        assert!(matches!(
2328            token,
2329            EncodedToken::ShortLz(super::ShortLz { length: 3, .. })
2330        ));
2331        assert!(should_lazy_emit_literal(
2332            input,
2333            10,
2334            &buckets,
2335            token,
2336            0x2001,
2337            EncodeOptions::default()
2338        ));
2339
2340        let packed = unpack15_encode(input).unwrap();
2341        assert_eq!(unpack15_decode(&packed, input.len()).unwrap(), input);
2342    }
2343
2344    #[test]
2345    fn cost_aware_selection_prefers_better_bits_per_byte_token() {
2346        let mut input = vec![b'Z'; 40];
2347        input[7] = b'A';
2348        input[8] = b'A';
2349        input[9] = b'A';
2350        input[10] = b'B';
2351        input[39] = b'A';
2352        let pos = input.len();
2353        input.extend_from_slice(b"AAAAAAAAAA");
2354
2355        let mut encoder = Unpack15Encoder::new();
2356        encoder.old_dist = [u32::MAX, u32::MAX, u32::MAX, 33];
2357        let buckets = long_lz_buckets(&input);
2358        let token = encoder
2359            .choose_lz_token(
2360                &input,
2361                pos,
2362                &buckets,
2363                LzPlanState {
2364                    last_dist: u32::MAX,
2365                    last_length: 0,
2366                    old_dist: encoder.old_dist,
2367                    old_dist_ptr: encoder.old_dist_ptr,
2368                    max_dist3: encoder.max_dist3,
2369                    nlzb: encoder.nlzb,
2370                    nhfb: encoder.nhfb,
2371                    l_count: encoder.l_count,
2372                },
2373                0,
2374            )
2375            .unwrap();
2376
2377        assert_eq!(
2378            token,
2379            EncodedToken::ShortLz(ShortLz {
2380                distance: 1,
2381                length: 10,
2382            })
2383        );
2384    }
2385
2386    #[test]
2387    fn planner_uses_simulated_max_dist3_for_old_distance_candidates() {
2388        let mut state = 0x1234_5678u32;
2389        let mut input = Vec::with_capacity(9004);
2390        for _ in 0..9004 {
2391            state ^= state << 13;
2392            state ^= state >> 17;
2393            state ^= state << 5;
2394            input.push(state as u8);
2395        }
2396        let pos = 9000;
2397        let prefix = [input[0], input[1], input[2]];
2398        input[pos..pos + 3].copy_from_slice(&prefix);
2399        input[pos + 3] = input[3].wrapping_add(1);
2400
2401        let mut encoder = Unpack15Encoder::new();
2402        encoder.max_dist3 = 0x7f00;
2403        let buckets = long_lz_buckets(&input);
2404        let token = encoder.choose_lz_token(
2405            &input,
2406            pos,
2407            &buckets,
2408            LzPlanState {
2409                last_dist: u32::MAX,
2410                last_length: 0,
2411                old_dist: [u32::MAX, u32::MAX, u32::MAX, 9000],
2412                old_dist_ptr: 0,
2413                max_dist3: 0x2001,
2414                nlzb: encoder.nlzb,
2415                nhfb: encoder.nhfb,
2416                l_count: encoder.l_count,
2417            },
2418            0,
2419        );
2420
2421        assert_eq!(token, None);
2422    }
2423
2424    #[test]
2425    fn encoder_round_trips_source_shaped_payload() {
2426        let source = include_bytes!("rar13.rs");
2427        let input = &source[..source.len().min(50_902)];
2428
2429        let packed = unpack15_encode(input).unwrap();
2430        let decoded = unpack15_decode(&packed, input.len()).unwrap();
2431
2432        let first_diff = decoded
2433            .iter()
2434            .zip(input)
2435            .position(|(actual, expected)| actual != expected);
2436        assert_eq!(first_diff, None, "first differing byte in decoded payload");
2437        assert_eq!(decoded, input);
2438    }
2439}
2440
2441#[derive(Clone)]
2442struct BitReader {
2443    input: Vec<u8>,
2444    bit_pos: usize,
2445    final_input: bool,
2446}
2447
2448impl BitReader {
2449    fn new(input: &[u8]) -> Self {
2450        Self {
2451            input: input.to_vec(),
2452            bit_pos: 0,
2453            final_input: false,
2454        }
2455    }
2456
2457    fn new_final(input: &[u8]) -> Self {
2458        Self {
2459            input: input.to_vec(),
2460            bit_pos: 0,
2461            final_input: true,
2462        }
2463    }
2464
2465    fn append(&mut self, input: &[u8]) {
2466        self.compact();
2467        self.input.extend_from_slice(input);
2468    }
2469
2470    fn finish(&mut self) {
2471        self.final_input = true;
2472    }
2473
2474    fn compact(&mut self) {
2475        let bytes = self.bit_pos / 8;
2476        if bytes == 0 {
2477            return;
2478        }
2479        self.input.drain(..bytes);
2480        self.bit_pos -= bytes * 8;
2481    }
2482
2483    fn get_bits(&self) -> Result<u32> {
2484        let mut value = 0u32;
2485        for i in 0..16 {
2486            value <<= 1;
2487            let bit_index = self.bit_pos + i;
2488            let byte = match self.input.get(bit_index / 8).copied() {
2489                Some(byte) => byte,
2490                None if self.final_input => 0,
2491                None => return Err(Error::NeedMoreInput),
2492            };
2493            value |= ((byte >> (7 - (bit_index % 8))) & 1) as u32;
2494        }
2495        Ok(value)
2496    }
2497
2498    fn add_bits(&mut self, count: usize) {
2499        self.bit_pos += count;
2500    }
2501}
2502
2503#[derive(Default)]
2504struct BitWriter {
2505    output: Vec<u8>,
2506    bit_pos: usize,
2507}
2508
2509impl BitWriter {
2510    fn new() -> Self {
2511        Self {
2512            output: Vec::new(),
2513            bit_pos: 0,
2514        }
2515    }
2516
2517    fn write_bits(&mut self, value: u32, count: usize) {
2518        for i in (0..count).rev() {
2519            let bit = ((value >> i) & 1) as u8;
2520            if self.bit_pos.is_multiple_of(8) {
2521                self.output.push(0);
2522            }
2523            if bit != 0 {
2524                let idx = self.output.len() - 1;
2525                self.output[idx] |= 1 << (7 - (self.bit_pos % 8));
2526            }
2527            self.bit_pos += 1;
2528        }
2529    }
2530
2531    fn finish(self) -> Vec<u8> {
2532        self.output
2533    }
2534}