Skip to main content

rar_stream/decompress/ppm/
model.rs

1//! PPMd model for RAR decompression.
2//!
3//! Based on Dmitry Shkarin's PPMd implementation.
4
5use super::super::BitReader;
6use super::range_coder::{RangeCoder, SubRange};
7use super::sub_alloc::SubAllocator;
8
9/// PPMd constants.
10const INT_BITS: u32 = 7;
11const PERIOD_BITS: u32 = 7;
12const TOT_BITS: u32 = INT_BITS + PERIOD_BITS;
13const INTERVAL: u32 = 1 << INT_BITS;
14const BIN_SCALE: u32 = 1 << TOT_BITS;
15const MAX_FREQ: u32 = 124;
16const MAX_O: usize = 64;
17const INIT_ESC: u32 = 4;
18
19/// PPMd state (symbol + frequency + successor).
20#[derive(Clone, Copy, Default)]
21struct State {
22    symbol: u8,
23    freq: u8,
24    successor: u32, // Offset in sub-allocator
25}
26
27/// PPMd context.
28struct Context {
29    num_stats: u16,
30    summ_freq: u16,
31    stats: u32,    // Offset to states array
32    suffix: u32,   // Offset to suffix context
33    // For single-stat contexts, we use OneState inline
34    one_state: State,
35}
36
37/// SEE2 context for escape estimation.
38#[derive(Clone, Copy)]
39struct See2Context {
40    summ: u16,
41    shift: u8,
42    count: u8,
43}
44
45impl See2Context {
46    fn new(init_val: u16) -> Self {
47        Self {
48            summ: init_val << (PERIOD_BITS as u8 - 4),
49            shift: PERIOD_BITS as u8 - 4,
50            count: 4,
51        }
52    }
53
54    fn get_mean(&mut self) -> u32 {
55        let ret = (self.summ >> self.shift) as i16;
56        self.summ = self.summ.wrapping_sub(ret as u16);
57        if ret == 0 { 1 } else { ret as u32 }
58    }
59
60    fn update(&mut self) {
61        if self.shift < PERIOD_BITS as u8 {
62            self.count = self.count.wrapping_sub(1);
63            if self.count == 0 {
64                self.summ = self.summ.wrapping_add(self.summ);
65                self.count = 3 << self.shift;
66                self.shift += 1;
67            }
68        }
69    }
70}
71
72/// PPMd model.
73pub struct PpmModel {
74    /// Sub-allocator for contexts.
75    sub_alloc: SubAllocator,
76    /// Minimum context.
77    min_context: u32,
78    /// Medium context.
79    med_context: u32,
80    /// Maximum context.
81    max_context: u32,
82    /// Found state.
83    found_state: u32,
84    /// Number of masked symbols.
85    num_masked: usize,
86    /// Initial escape.
87    init_esc: u32,
88    /// Order fall.
89    order_fall: i32,
90    /// Maximum order.
91    max_order: i32,
92    /// Run length.
93    run_length: i32,
94    /// Initial run length.
95    init_rl: i32,
96    /// Character mask.
97    char_mask: [u8; 256],
98    /// NS2 index mapping.
99    ns2_indx: [u8; 256],
100    /// NS2 BS index mapping.
101    ns2_bs_indx: [u8; 256],
102    /// HB2 flag.
103    hb2_flag: [u8; 256],
104    /// Escape count.
105    esc_count: u8,
106    /// Previous success.
107    prev_success: u8,
108    /// High bits flag.
109    hi_bits_flag: u8,
110    /// Binary SEE contexts.
111    bin_summ: [[u16; 64]; 128],
112    /// SEE2 contexts.
113    see2_cont: [[See2Context; 16]; 25],
114    /// Dummy SEE2 context.
115    dummy_see2: See2Context,
116    /// Escape character.
117    esc_char: i32,
118    /// Debug: decode count
119    
120    debug_count: u32,
121}
122
123impl PpmModel {
124    /// Create a new PPM model.
125    pub fn new() -> Self {
126        Self {
127            sub_alloc: SubAllocator::new(1), // Start with 1MB
128            min_context: 0,
129            med_context: 0,
130            max_context: 0,
131            found_state: 0,
132            num_masked: 0,
133            init_esc: 0,
134            order_fall: 0,
135            max_order: 0,
136            run_length: 0,
137            init_rl: 0,
138            char_mask: [0; 256],
139            ns2_indx: [0; 256],
140            ns2_bs_indx: [0; 256],
141            hb2_flag: [0; 256],
142            esc_count: 0,
143            prev_success: 0,
144            hi_bits_flag: 0,
145            bin_summ: [[0; 64]; 128],
146            see2_cont: [[See2Context::new(0); 16]; 25],
147            dummy_see2: See2Context { summ: 0, shift: PERIOD_BITS as u8, count: 0 },
148            esc_char: -1,
149            
150            debug_count: 0,
151        }
152    }
153
154    /// Initialize the model from a byte stream. Returns (RangeCoder, esc_char).
155    pub fn init(&mut self, reader: &mut BitReader) -> Result<(RangeCoder, i32), &'static str> {
156        let max_order_byte = reader.read_byte().ok_or("EOF reading max order")?;
157        let reset = (max_order_byte & 0x20) != 0;
158
159        #[cfg(test)]
160        eprintln!("[PPM init] max_order_byte=0x{:02x} reset={}", max_order_byte, reset);
161
162        // If reset flag is set, or if we haven't initialized yet, we need to initialize
163        let need_init = reset || self.min_context == 0;
164
165        let max_mb = if reset {
166            reader.read_byte().ok_or("EOF reading max MB")? as usize
167        } else {
168            1 // Default
169        };
170
171        
172
173        if (max_order_byte & 0x40) != 0 {
174            self.esc_char = reader.read_byte().ok_or("EOF reading esc char")? as i32;
175            
176        }
177
178        // Initialize range coder
179        let coder = RangeCoder::new(reader);
180
181        if need_init {
182            let mut max_order = (max_order_byte & 0x1f) as i32 + 1;
183            if max_order > 16 {
184                max_order = 16 + (max_order - 16) * 3;
185            }
186            
187            #[cfg(test)]
188            eprintln!("[PPM init] max_order={} max_mb={}", max_order, max_mb);
189            
190            if max_order == 1 {
191                return Err("Invalid max order");
192            }
193            
194            // Reinitialize sub-allocator
195            self.sub_alloc = SubAllocator::new(max_mb + 1);
196            self.start_model(max_order);
197        }
198
199        
200
201        if self.min_context == 0 {
202            return Err("Model initialization failed");
203        }
204
205        Ok((coder, self.esc_char))
206    }
207
208    /// Start/restart the model.
209    fn start_model(&mut self, max_order: i32) {
210        self.max_order = max_order;
211        self.esc_count = 1;
212        self.restart_model();
213        
214        // Initialize NS2 index tables
215        self.ns2_bs_indx[0] = 0;
216        self.ns2_bs_indx[1] = 2;
217        for i in 2..11 {
218            self.ns2_bs_indx[i] = 4;
219        }
220        for i in 11..256 {
221            self.ns2_bs_indx[i] = 6;
222        }
223
224        for i in 0..3 {
225            self.ns2_indx[i] = i as u8;
226        }
227        let mut m = 3u8;
228        let mut k = 1usize;
229        let mut step = 1usize;
230        for i in 3..256 {
231            self.ns2_indx[i] = m;
232            k = k.saturating_sub(1);
233            if k == 0 {
234                step += 1;
235                k = step;
236                m += 1;
237            }
238        }
239
240        for i in 0..0x40 {
241            self.hb2_flag[i] = 0;
242        }
243        for i in 0x40..0x100 {
244            self.hb2_flag[i] = 0x08;
245        }
246
247        self.dummy_see2.shift = PERIOD_BITS as u8;
248    }
249
250    /// Restart the model (clear and reinitialize).
251    fn restart_model(&mut self) {
252        self.char_mask = [0; 256];
253        self.sub_alloc.init();
254
255        self.init_rl = -(if self.max_order < 12 { self.max_order } else { 12 }) - 1;
256        
257        // Allocate root context
258        let ctx = self.sub_alloc.alloc_context().unwrap_or(0);
259        self.min_context = ctx as u32;
260        self.max_context = ctx as u32;
261        
262        
263        
264        if ctx == 0 {
265            return;
266        }
267
268        // Initialize root context with 256 symbols
269        self.write_context_num_stats(ctx, 256);
270        self.write_context_summ_freq(ctx, 257);
271        
272        let stats = self.sub_alloc.alloc_units(128).unwrap_or(0);
273        self.write_context_stats(ctx, stats as u32);
274        
275        
276        
277        // Verify it was written correctly
278        
279        {
280            let _read_back = self.read_context_stats(ctx);
281        }
282        self.write_context_stats(ctx, stats as u32);
283        
284        self.order_fall = self.max_order;
285        self.found_state = stats as u32;
286        
287        // Initialize all 256 symbols with freq=1
288        for i in 0..256 {
289            self.write_state(stats + i * 6, i as u8, 1, 0);
290        }
291        
292        // Verify initialization
293        #[cfg(test)]
294        {
295            for i in 0..256 {
296                let sym = self.read_state_symbol(stats + i * 6);
297                if sym != i as u8 {
298                    eprintln!("[INIT] ERROR: stats[{}] has sym {} instead of {}", i, sym, i);
299                }
300            }
301        }
302
303        self.run_length = self.init_rl;
304        self.prev_success = 0;
305
306        // Initialize binary SEE contexts
307        let init_bin_esc: [u16; 8] = [
308            0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051
309        ];
310        
311        for i in 0..128 {
312            for k in 0..8 {
313                for m in (0..64).step_by(8) {
314                    self.bin_summ[i][k + m] = 
315                        (BIN_SCALE as u16).wrapping_sub(init_bin_esc[k] / (i as u16 + 2));
316                }
317            }
318        }
319
320        // Initialize SEE2 contexts
321        for i in 0..25 {
322            for k in 0..16 {
323                self.see2_cont[i][k] = See2Context::new((5 * i + 10) as u16);
324            }
325        }
326    }
327
328    /// Decode a character.
329    pub fn decode_char(&mut self, coder: &mut RangeCoder, reader: &mut BitReader) -> Result<i32, &'static str> {
330        // Track position for debugging
331        self.debug_count += 1;
332        
333        #[cfg(test)]
334        let start_bytes = reader.byte_position();
335        
336        #[cfg(test)]
337        if self.debug_count == 0 {
338            let (code, low, range) = coder.debug_state();
339            eprintln!("[ENTRY pos={}] low={} range={} code={} code-low={} prev_success={}", 
340                     self.debug_count, low, range, code, code.wrapping_sub(low), self.prev_success);
341        }
342        
343        // Check context validity
344        let text_ptr = self.sub_alloc.text_ptr();
345        let heap_end = self.sub_alloc.heap_end();
346        
347        if self.min_context as usize <= text_ptr || self.min_context as usize > heap_end {
348            return Err("Invalid context");
349        }
350
351        let num_stats = self.read_context_num_stats(self.min_context as usize);
352        
353        #[cfg(test)]
354        if self.debug_count == 0 {
355            let (code, low, range) = coder.debug_state();
356            eprintln!("[ENTRY pos={}] low={} range={} code={} NumStats={} ctx={}", 
357                     self.debug_count, low, range, code, num_stats, self.min_context);
358        }
359        
360        #[cfg(test)]
361        if self.debug_count == 0 {
362            let summ = self.read_context_summ_freq(self.min_context as usize);
363            eprintln!("[pos={}] min_context={} NumStats={} SummFreq={} order_fall={}", 
364                     self.debug_count, self.min_context, num_stats, summ, self.order_fall);
365        }
366        
367        if num_stats != 1 {
368            // Multi-symbol context
369            let stats = self.read_context_stats(self.min_context as usize);
370            if stats as usize <= text_ptr || stats as usize > heap_end {
371                #[cfg(test)]
372                eprintln!("[pos={}] INVALID STATS: ctx={} num_stats={} stats={} text_ptr={} heap_end={}", 
373                         self.debug_count, self.min_context, num_stats, stats, text_ptr, heap_end);
374                return Err("Invalid stats pointer");
375            }
376            #[cfg(test)]
377            if self.debug_count == 0 {
378                eprintln!("[pos={}] Multi-symbol context at {}, num_stats={}", self.debug_count, self.min_context, num_stats);
379            }
380            self.decode_symbol1(coder, reader)?;
381        } else {
382            // Binary context
383            #[cfg(test)]
384            if self.debug_count == 0 {
385                // OneState symbol is at context+2
386                let sym = self.sub_alloc.read_byte(self.min_context as usize + 2);
387                let suffix = self.read_context_suffix(self.min_context as usize);
388                eprintln!("[pos={}] Binary context at {}, sym='{}' ({}), suffix={}, order_fall={}, max_order={}", 
389                         self.debug_count, self.min_context, sym as char, sym, suffix, self.order_fall, self.max_order);
390            }
391            self.decode_bin_symbol(coder, reader)?;
392        }
393
394        // Normalize is called in the escape loop or at the end of decode_char
395        // Not here after a successful decode
396        
397        while self.found_state == 0 {
398            coder.normalize(reader);
399            #[cfg(test)]
400            if self.debug_count == 0 {
401                let (code, low, range) = coder.debug_state();
402                eprintln!("[ESCAPE pos={}] After normalize: low={} range={} code={}", self.debug_count, low, range, code);
403            }
404            
405            // Walk up suffix chain
406            loop {
407                self.order_fall += 1;
408                let suffix = self.read_context_suffix(self.min_context as usize);
409                
410                #[cfg(test)]
411                if self.debug_count == 0 {
412                    eprintln!("[ESCAPE pos={}] order_fall={} min_context={} suffix={}", 
413                             self.debug_count, self.order_fall, self.min_context, suffix);
414                }
415                
416                if suffix as usize <= text_ptr || suffix as usize > heap_end {
417                    #[cfg(test)]
418                    eprintln!("[ESCAPE pos={}] Invalid suffix={} (text_ptr={} heap_end={})", 
419                             self.debug_count, suffix, text_ptr, heap_end);
420                    return Err("Invalid suffix");
421                }
422                
423                self.min_context = suffix;
424                
425                let ns = self.read_context_num_stats(suffix as usize);
426                if ns as usize != self.num_masked {
427                    #[cfg(test)]
428                    if self.debug_count == 0 {
429                        eprintln!("[ESCAPE pos={}] Found context with ns={} (masked={})", self.debug_count, ns, self.num_masked);
430                    }
431                    break;
432                }
433            }
434            
435            self.decode_symbol2(coder, reader)?;
436            // No normalize here - unrar doesn't normalize after decodeSymbol2 inside the while loop
437        }
438
439        // Get the decoded symbol
440        let symbol = self.read_state_symbol(self.found_state as usize);
441        
442        // Update model
443        let successor = self.read_state_successor(self.found_state as usize);
444        if self.order_fall == 0 && successor as usize > text_ptr {
445            let succ = successor;
446            self.min_context = succ;
447            self.max_context = succ;
448        } else {
449            self.update_model();
450            if self.esc_count == 0 {
451                self.clear_mask();
452            }
453        }
454
455        coder.normalize(reader);
456        
457        #[cfg(test)]
458        {
459            let end_bytes = reader.byte_position();
460            let bytes_consumed = end_bytes - start_bytes;
461            if self.debug_count == 0 || (self.debug_count >= 1120 && self.debug_count <= 1135) {
462                eprintln!("[pos={}] sym='{}' ({}) bytes_consumed={} found_state={}", 
463                         self.debug_count, symbol as char, symbol, bytes_consumed, self.found_state);
464            }
465        }
466        
467        Ok(symbol as i32)
468    }
469
470    /// Decode from a multi-symbol context.
471    fn decode_symbol1(&mut self, coder: &mut RangeCoder, _reader: &mut BitReader) -> Result<(), &'static str> {
472        let summ_freq = self.read_context_summ_freq(self.min_context as usize);
473        let stats = self.read_context_stats(self.min_context as usize);
474        let num_stats = self.read_context_num_stats(self.min_context as usize);
475        
476        #[cfg(test)]
477        if self.debug_count == 0 {
478            eprintln!("[DS1 pos={}] summ_freq={} num_stats={} prev_success_before={}", 
479                     self.debug_count, summ_freq, num_stats, self.prev_success);
480        }
481        
482        let count = coder.get_current_count(summ_freq as u32);
483        
484        #[cfg(test)]
485        if self.debug_count == 0 {
486            eprintln!("[DS1 pos={}] count={}", self.debug_count, count);
487        }
488        
489        // Check for out-of-range count
490        if count >= summ_freq as u32 {
491            return Err("Count exceeds scale");
492        }
493        
494        let mut hi_cnt = 0u32;
495        
496        for i in 0..num_stats {
497            let state_ptr = stats as usize + (i as usize) * 6;
498            let freq = self.read_state_freq(state_ptr) as u32;
499            let _sym = self.read_state_symbol(state_ptr);
500            hi_cnt += freq;
501            
502            #[cfg(test)]
503            if self.debug_count == 0 {
504                eprintln!("[DS1 pos={}] i={} sym='{}' ({}) freq={} hi_cnt={}", self.debug_count, i, sym as char, sym, freq, hi_cnt);
505            }
506            
507            if hi_cnt > count {
508                let lo_cnt = hi_cnt - freq;
509                
510                #[cfg(test)]
511                if self.debug_count == 0 {
512                    eprintln!("[DS1 pos={}] Selected i={} sym='{}' ({}) lo={} hi={}", 
513                             self.debug_count, i, sym as char, sym, lo_cnt, hi_cnt);
514                    let (code, low, range) = coder.debug_state();
515                    eprintln!("[DS1 pos={}] BEFORE decode: low={} range={} code={}", 
516                             self.debug_count, low, range, code);
517                }
518                let sub = SubRange {
519                    low_count: lo_cnt,
520                    high_count: hi_cnt,
521                    scale: summ_freq as u32,
522                };
523                coder.decode(&sub);
524                
525                #[cfg(test)]
526                if self.debug_count == 0 {
527                    let (code, low, range) = coder.debug_state();
528                    eprintln!("[DS1 pos={}] AFTER decode: low={:#x} range={:#x} code={:#x}", 
529                             self.debug_count, low, range, code);
530                }
531                
532                // Calculate prev_success BEFORE updating frequencies (match unrar)
533                // IMPORTANT: PrevSuccess is only calculated for FIRST symbol (i==0)
534                // For other symbols, PrevSuccess = 0
535                if i == 0 {
536                    self.prev_success = if 2 * freq > summ_freq as u32 { 1 } else { 0 };
537                    self.run_length += self.prev_success as i32;
538                } else {
539                    self.prev_success = 0;
540                }
541                
542                // Update frequency and check for rescale
543                let hi_cnt = freq + 4;
544                self.write_state_freq(state_ptr, hi_cnt as u8);
545                
546                // Update summ_freq
547                let new_summ = summ_freq.saturating_add(4);
548                self.write_context_summ_freq(self.min_context as usize, new_summ);
549                
550                // Swap with previous state if this one has higher frequency (move-to-front)
551                // This matches unrar's update1() behavior
552                if i > 0 {
553                    let prev_ptr = stats as usize + ((i - 1) as usize) * 6;
554                    let prev_freq = self.read_state_freq(prev_ptr);
555                    if hi_cnt as u8 > prev_freq {
556                        // Swap the two states (6 bytes each)
557                        let cur_sym = self.read_state_symbol(state_ptr);
558                        let cur_succ = self.read_state_successor(state_ptr);
559                        let prev_sym = self.read_state_symbol(prev_ptr);
560                        let prev_succ = self.read_state_successor(prev_ptr);
561                        
562                        self.write_state(prev_ptr, cur_sym, hi_cnt as u8, cur_succ);
563                        self.write_state(state_ptr, prev_sym, prev_freq, prev_succ);
564                        
565                        self.found_state = prev_ptr as u32;
566                        
567                        // Check if rescale needed
568                        if hi_cnt > MAX_FREQ {
569                            self.rescale();
570                        }
571                    } else {
572                        self.found_state = state_ptr as u32;
573                        if hi_cnt > MAX_FREQ {
574                            self.rescale();
575                        }
576                    }
577                } else {
578                    self.found_state = state_ptr as u32;
579                    if hi_cnt > MAX_FREQ {
580                        self.rescale();
581                    }
582                }
583                
584                return Ok(());
585            }
586        }
587        
588        // Escape
589        #[cfg(test)]
590        if self.debug_count == 0 {
591            eprintln!("[DS1 pos={} ESCAPE] hi_cnt={} summ_freq={} before decode", self.debug_count, hi_cnt, summ_freq);
592        }
593        
594        // Set PrevSuccess = 0 on escape (matching unrar line 467)
595        self.prev_success = 0;
596        
597        // Set HiBitsFlag based on previous FoundState's symbol (matching unrar's line 448)
598        if self.found_state != 0 {
599            let prev_sym = self.read_state_symbol(self.found_state as usize);
600            self.hi_bits_flag = self.hb2_flag[prev_sym as usize];
601        }
602        
603        let sub = SubRange {
604            low_count: hi_cnt,
605            high_count: summ_freq as u32,
606            scale: summ_freq as u32,
607        };
608        coder.decode(&sub);
609        
610        #[cfg(test)]
611        if self.debug_count == 0 {
612            let (code, low, range) = coder.debug_state();
613            eprintln!("[DS1 pos=98 ESCAPE] After decode: low={} range={} code={}", low, range, code);
614        }
615        
616        self.num_masked = num_stats as usize;
617        self.found_state = 0;
618        
619        // Set masks - mark all symbols in this context as masked
620        // NOTE: Do NOT increment esc_count here - that happens in update2() after decodeSymbol2 finds a symbol
621        for i in 0..num_stats {
622            let state_ptr = stats as usize + (i as usize) * 6;
623            let sym = self.read_state_symbol(state_ptr);
624            self.char_mask[sym as usize] = self.esc_count;
625        }
626        
627        Ok(())
628    }
629
630    /// Decode from a binary context.
631    fn decode_bin_symbol(&mut self, coder: &mut RangeCoder, _reader: &mut BitReader) -> Result<(), &'static str> {
632        let state = self.read_context_one_state(self.min_context as usize);
633        
634        // Update HiBitsFlag based on previous FoundState's symbol (set at start of decode)
635        if self.found_state != 0 {
636            let prev_sym = self.read_state_symbol(self.found_state as usize);
637            self.hi_bits_flag = self.hb2_flag[prev_sym as usize];
638        }
639        
640        // Get binary probability - match unrar's index calculation exactly
641        let suffix = self.read_context_suffix(self.min_context as usize);
642        let suffix_num_stats = if suffix != 0 {
643            self.read_context_num_stats(suffix as usize)
644        } else {
645            1 // Default if no suffix
646        };
647        
648        // Use NS2BSIndx (not ns2_indx) with NumStats-1
649        let ns_idx = if suffix_num_stats > 0 { suffix_num_stats - 1 } else { 0 };
650        let ns1 = self.ns2_bs_indx[ns_idx as usize] as usize;
651        
652        // Index calculation matching unrar:
653        // PrevSuccess + NS2BSIndx[Suffix->NumStats-1] + HiBitsFlag + 2*HB2Flag[rs.Symbol] + ((RunLength >> 26) & 0x20)
654        let idx1 = (self.prev_success as usize) + 
655            ns1 + 
656            self.hi_bits_flag as usize +
657            2 * (self.hb2_flag[state.symbol as usize] as usize) +
658            ((self.run_length >> 26) & 0x20) as usize;
659        
660        // BinSumm first index is Freq-1 (not freq>>2)
661        let freq_idx = if state.freq > 0 { (state.freq - 1) as usize } else { 0 };
662        let freq_idx = freq_idx.min(127); // BinSumm is [128][64]
663        let idx1 = idx1.min(63);
664        
665        let bs = self.bin_summ[freq_idx][idx1];
666        
667        #[cfg(test)]
668        if self.debug_count == 0 {
669            eprintln!("[BIN pos={} idx] prev_success={} ns1={} hi_bits_flag={} hb2_flag[{}]={} run_length={}", 
670                     self.debug_count, self.prev_success, ns1, self.hi_bits_flag, state.symbol, self.hb2_flag[state.symbol as usize], self.run_length);
671            eprintln!("[BIN pos={} idx] idx1={} freq_idx={} bs={}", self.debug_count, idx1, freq_idx, bs);
672        }
673        
674        let count = coder.get_current_shift_count(TOT_BITS);
675        
676        #[cfg(test)]
677        if self.debug_count == 0 {
678            eprintln!("[BIN pos={}] sym='{}' ({}) freq={} bs={} count={}", 
679                     self.debug_count, state.symbol as char, state.symbol, state.freq, bs, count);
680            let (code, low, range) = coder.debug_state();
681            eprintln!("[BIN pos={}] after get_count: low={} range={} code={}", self.debug_count, low, range, code);
682        }
683        
684        if count < bs as u32 {
685            // Symbol found
686            let sub = SubRange {
687                low_count: 0,
688                high_count: bs as u32,
689                scale: BIN_SCALE,
690            };
691            
692            #[cfg(test)]
693            if self.debug_count == 0 {
694                let (code, low, range) = coder.debug_state();
695                eprintln!("[BIN pos={}] FOUND: lo=0 hi={} scale={} | before decode: low={} range={} code={}", 
696                         self.debug_count, bs, BIN_SCALE, low, range, code);
697            }
698            
699            coder.decode(&sub);
700            
701            // Update frequency
702            let new_freq = state.freq + (if state.freq < 128 { 1 } else { 0 });
703            self.write_context_one_state_freq(self.min_context as usize, new_freq);
704            
705            // Update bin_summ: bs + INTERVAL - GET_MEAN(bs, PERIOD_BITS, 2)
706            // GET_MEAN(SUMM,SHIFT,ROUND) = ((SUMM+(1 << (SHIFT-ROUND))) >> SHIFT)
707            let mean = ((bs as u32 + (1 << (PERIOD_BITS - 2))) >> PERIOD_BITS) as u16;
708            let new_bs = bs.saturating_add((INTERVAL as u16).saturating_sub(mean));
709            self.bin_summ[freq_idx][idx1] = new_bs;
710            
711            self.found_state = self.min_context + 2; // OneState offset (in union at offset 2)
712            self.prev_success = 1;
713            self.run_length += 1;
714        } else {
715            // Escape
716            let sub = SubRange {
717                low_count: bs as u32,
718                high_count: BIN_SCALE,
719                scale: BIN_SCALE,
720            };
721            
722            #[cfg(test)]
723            if self.debug_count == 0 {
724                let (code, low, range) = coder.debug_state();
725                eprintln!("[BIN pos={}] ESCAPE: lo={} hi={} scale={} | before decode: low={} range={} code={}", 
726                         self.debug_count, bs, BIN_SCALE, BIN_SCALE, low, range, code);
727            }
728            
729            coder.decode(&sub);
730            
731            // Update bin_summ: bs - GET_MEAN(bs, PERIOD_BITS, 2)
732            let mean = ((bs as u32 + (1 << (PERIOD_BITS - 2))) >> PERIOD_BITS) as u16;
733            let new_bs = bs.saturating_sub(mean);
734            self.bin_summ[freq_idx][idx1] = new_bs;
735            
736            // InitEsc = ExpEscape[bs >> 10]
737            static EXP_ESCAPE: [u8; 16] = [25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2];
738            self.init_esc = EXP_ESCAPE[(new_bs >> 10) as usize] as u32;
739            
740            self.num_masked = 1;
741            self.found_state = 0;
742            self.char_mask[state.symbol as usize] = self.esc_count;
743            // Don't increment esc_count here - it's done in update2 after successful decode
744            self.prev_success = 0;
745        }
746        
747        Ok(())
748    }
749
750    /// Decode from a masked context.
751    fn decode_symbol2(&mut self, coder: &mut RangeCoder, _reader: &mut BitReader) -> Result<(), &'static str> {
752        #[cfg(test)]
753        if self.debug_count == 0 {
754            let (code, low, range) = coder.debug_state();
755            eprintln!("[DS2 pos={} entry] coder: low={:010} range={:010} code={:010}", 
756                     self.debug_count, low, range, code);
757        }
758        
759        let num_stats = self.read_context_num_stats(self.min_context as usize);
760        let stats = self.read_context_stats(self.min_context as usize);
761        
762        #[cfg(test)]
763        if self.debug_count == 0 || self.debug_count == 58 {
764            eprintln!("[DS2 pos={}] min_context={} num_stats={} num_masked={}", 
765                     self.debug_count, self.min_context, num_stats, self.num_masked);
766        }
767        
768        // Calculate i = NumStats - NumMasked (number of unmasked symbols)
769        let i = num_stats as usize - self.num_masked;
770        if i == 0 {
771            return Err("All symbols masked");
772        }
773        
774        // Calculate escape frequency using SEE2 or simplified for root
775        let esc_freq: u32;
776        let see2_row: usize;
777        let see2_col: usize;
778        let is_root = num_stats == 256;
779        
780        if !is_root {
781            // Use SEE2 - simplified version using NS2Indx
782            let ns2_idx = self.ns2_indx[(i - 1).min(255)] as usize;
783            let suffix = self.read_context_suffix(self.min_context as usize);
784            let suffix_num_stats = if suffix != 0 { 
785                self.read_context_num_stats(suffix as usize) 
786            } else { 
787                num_stats 
788            };
789            let summ_freq = self.read_context_summ_freq(self.min_context as usize);
790            
791            // Index into SEE2Cont
792            let diff_suffix = (i < (suffix_num_stats as usize - num_stats as usize)) as usize;
793            let freq_check = (summ_freq < 11 * num_stats) as usize;
794            let masked_check = (self.num_masked > i) as usize;
795            let see2_idx = ns2_idx + diff_suffix + 2 * freq_check + 4 * masked_check + self.hi_bits_flag as usize;
796            let see2_idx = see2_idx.min(24 * 16 - 1);
797            
798            see2_row = ns2_idx.min(24);
799            see2_col = see2_idx % 16;
800            
801            // Use get_mean() which decrements Summ (matching unrar's getMean behavior)
802            esc_freq = self.see2_cont[see2_row][see2_col].get_mean();
803            
804            #[cfg(test)]
805            if self.debug_count == 58 || self.debug_count == 0 {
806                let suffix = self.read_context_suffix(self.min_context as usize);
807                let suffix_ns = if suffix != 0 { self.read_context_num_stats(suffix as usize) } else { 0 };
808                eprintln!("[DS2 pos={}] SEE2 inputs: i={} suffix_ns={} summ_freq={} num_masked={} hi_bits_flag={}",
809                         self.debug_count, i, suffix_ns, summ_freq, self.num_masked, self.hi_bits_flag);
810                eprintln!("[DS2 pos={}] SEE2 indices: ns2_idx={} diff_suffix={} freq_check={} masked_check={}", 
811                         self.debug_count, ns2_idx, diff_suffix, freq_check, masked_check);
812                eprintln!("[DS2 pos={}] SEE2: see2_idx={} esc_freq={}",
813                         self.debug_count, see2_idx, esc_freq);
814            }
815        } else {
816            // Root context uses scale=1 (escape freq = 1)
817            esc_freq = 1;
818            see2_row = 0;
819            see2_col = 0;
820        }
821        
822        // Collect unmasked symbols and their frequencies
823        let mut hi_cnt = 0u32;
824        let mut unmasked: [(usize, u32); 256] = [(0, 0); 256];
825        let mut unmasked_idx = 0;
826        
827        #[cfg(test)]
828        let mut freq_histogram = [0u32; 256];
829        
830        for j in 0..num_stats {
831            let state_ptr = stats as usize + (j as usize) * 6;
832            let sym = self.read_state_symbol(state_ptr);
833            if self.char_mask[sym as usize] != self.esc_count {
834                let freq = self.read_state_freq(state_ptr) as u32;
835                hi_cnt += freq;
836                unmasked[unmasked_idx] = (state_ptr, freq);
837                unmasked_idx += 1;
838                
839                #[cfg(test)]
840                if self.debug_count == 13 && freq > 1 {
841                    freq_histogram[sym as usize] = freq;
842                }
843            }
844        }
845        
846        #[cfg(test)]
847        if self.debug_count == 15 {
848            eprintln!("[DS2 pos=15] stats pointer={}, root context={}", stats, self.min_context);
849            
850            // Find symbol 'l' (108) and 'p' (112)
851            let mut cum = 0u32;
852            for j in 0..num_stats {
853                let state_ptr = stats as usize + (j as usize) * 6;
854                let sym = self.read_state_symbol(state_ptr);
855                if self.char_mask[sym as usize] != self.esc_count {
856                    let freq = self.read_state_freq(state_ptr) as u32;
857                    let prev_cum = cum;
858                    cum += freq;
859                    if sym == 108 { // 'l'
860                        eprintln!("[DS2 pos=15] 'l' at j={} prev_cum={} cum={} freq={}", j, prev_cum, cum, freq);
861                    }
862                    if sym == 112 { // 'p'
863                        eprintln!("[DS2 pos=15] 'p' at j={} prev_cum={} cum={} freq={}", j, prev_cum, cum, freq);
864                    }
865                    // Trace symbols with freq > 1
866                    if freq > 1 {
867                        eprintln!("[DS2 pos=15] sym='{}' ({}) j={} prev_cum={} cum={} freq={} state_ptr={}", 
868                                 sym as char, sym, j, prev_cum, cum, freq, state_ptr);
869                    }
870                }
871            }
872        }
873        
874        #[cfg(test)]
875        if self.debug_count == 13 {
876            // Find symbol 'd' (100)
877            let mut cum = 0u32;
878            for j in 0..num_stats {
879                let state_ptr = stats as usize + (j as usize) * 6;
880                let sym = self.read_state_symbol(state_ptr);
881                if self.char_mask[sym as usize] != self.esc_count {
882                    let freq = self.read_state_freq(state_ptr) as u32;
883                    let prev_cum = cum;
884                    cum += freq;
885                    if sym == 100 { // 'd'
886                        eprintln!("[DS2 pos=13] 'd' at j={} prev_cum={} cum={} freq={}", j, prev_cum, cum, freq);
887                    }
888                    // Also trace which symbol is at cumulative 238-239
889                    if prev_cum <= 238 && cum > 238 {
890                        eprintln!("[DS2 pos=13] At cum=238: j={} sym='{}' ({}) prev_cum={} cum={}", 
891                                 j, sym as char, sym, prev_cum, cum);
892                    }
893                }
894            }
895            eprintln!("[DS2 pos=13] To select 'd', need count in [prev_cum, cum)");
896        }
897        
898        // Total scale = escape freq + sum of unmasked frequencies  
899        let scale = esc_freq + hi_cnt;
900        
901        let count = coder.get_current_count(scale);
902        
903        #[cfg(test)]
904        if self.debug_count == 0 {
905            eprintln!("[DS2 pos={}] esc_freq={} hi_cnt={} scale={} count={} unmasked_idx={}", 
906                     self.debug_count, esc_freq, hi_cnt, scale, count, unmasked_idx);
907        }
908        
909        // Find symbol or escape
910        if count < hi_cnt {
911            // Symbol found
912            let mut cum = 0u32;
913            for k in 0..unmasked_idx {
914                let (state_ptr, freq) = unmasked[k];
915                cum += freq;
916                if cum > count {
917                    let lo_cnt = cum - freq;
918                    let _sym = self.read_state_symbol(state_ptr);
919                    
920                    #[cfg(test)]
921                    if self.debug_count == 0 || self.debug_count == 58 {
922                        eprintln!("[DS2 pos={}] Selected k={} sym='{}' ({}) at cum={} lo={} freq={}", 
923                                 self.debug_count, k, sym as char, sym, cum, lo_cnt, freq);
924                    }
925                    
926                    #[cfg(test)]
927                    if self.debug_count == 13 || self.debug_count == 58 {
928                        let (code, low, range) = coder.debug_state();
929                        eprintln!("[DS2 pos={}] FOUND lo={} hi={} scale={}", self.debug_count, lo_cnt, cum, scale);
930                        eprintln!("[DS2 pos={}] Before decode: low={} range={} code={}", self.debug_count, low, range, code);
931                    }
932                    
933                    let sub = SubRange {
934                        low_count: lo_cnt,
935                        high_count: cum,
936                        scale,
937                    };
938                    coder.decode(&sub);
939                    
940                    #[cfg(test)]
941                    if self.debug_count == 13 || self.debug_count == 58 {
942                        let (code, low, range) = coder.debug_state();
943                        eprintln!("[DS2 pos={}] After decode: low={} range={} code={}", self.debug_count, low, range, code);
944                    }
945                    
946                    // Update SEE2 context (matching unrar's psee2c->update())
947                    if !is_root {
948                        self.see2_cont[see2_row][see2_col].update();
949                    }
950                    
951                    self.found_state = state_ptr as u32;
952                    
953                    // Update frequency and check for rescale (update2)
954                    let new_freq = freq + 4;
955                    self.write_state_freq(state_ptr, new_freq as u8);
956                    
957                    let summ = self.read_context_summ_freq(self.min_context as usize);
958                    self.write_context_summ_freq(self.min_context as usize, summ + 4);
959                    
960                    // Check if rescale needed
961                    if new_freq > MAX_FREQ {
962                        self.rescale();
963                    }
964                    
965                    self.esc_count = self.esc_count.wrapping_add(1);
966                    self.run_length = self.init_rl;
967                    
968                    return Ok(());
969                }
970            }
971        }
972        
973        // Escape - add scale to SEE2 Summ (matching unrar's psee2c->Summ += scale)
974        if !is_root {
975            self.see2_cont[see2_row][see2_col].summ = 
976                self.see2_cont[see2_row][see2_col].summ.wrapping_add(scale as u16);
977        }
978        
979        let sub = SubRange {
980            low_count: hi_cnt,
981            high_count: scale,
982            scale,
983        };
984        coder.decode(&sub);
985        
986        // Mask remaining symbols
987        for k in 0..unmasked_idx {
988            let (state_ptr, _) = unmasked[k];
989            let sym = self.read_state_symbol(state_ptr);
990            self.char_mask[sym as usize] = self.esc_count;
991        }
992        self.num_masked = num_stats as usize;
993        
994        Ok(())
995    }
996
997    /// Update the model after decoding.
998    /// Create a child context.
999    /// Returns the new context pointer, or 0 on failure.
1000    fn create_child(&mut self, parent_ctx: u32, p_stats: usize, first_state_symbol: u8, first_state_freq: u8, first_state_successor: u32) -> u32 {
1001        let pc = match self.sub_alloc.alloc_context() {
1002            Some(ctx) => ctx as u32,
1003            None => return 0,
1004        };
1005        
1006        // NumStats = 1 (binary context)
1007        self.write_context_num_stats(pc as usize, 1);
1008        
1009        // OneState = FirstState (stored inline at offset 2, in the union with SummFreq+Stats)
1010        // Layout: Symbol(1) + Freq(1) + Successor(4) = 6 bytes at offset 2-7
1011        self.write_state(pc as usize + 2, first_state_symbol, first_state_freq, first_state_successor);
1012        
1013        // Suffix = parent context
1014        self.write_context_suffix(pc as usize, parent_ctx);
1015        
1016        // Update pStats->Successor to point to new context
1017        self.write_state_successor(p_stats, pc);
1018        
1019        pc
1020    }
1021
1022    /// Rescale frequencies in the current context when they exceed MAX_FREQ.
1023    /// This halves all frequencies while maintaining sorted order.
1024    fn rescale(&mut self) {
1025        let ctx = self.min_context as usize;
1026        let old_ns = self.read_context_num_stats(ctx);
1027        let stats = self.read_context_stats(ctx);
1028        
1029        // Move FoundState to front (swap chain)
1030        let mut p = self.found_state as usize;
1031        while p != stats as usize {
1032            // Swap p with p-6 (previous state)
1033            let prev_p = p - 6;
1034            let p_sym = self.read_state_symbol(p);
1035            let p_freq = self.read_state_freq(p);
1036            let p_succ = self.read_state_successor(p);
1037            let prev_sym = self.read_state_symbol(prev_p);
1038            let prev_freq = self.read_state_freq(prev_p);
1039            let prev_succ = self.read_state_successor(prev_p);
1040            self.write_state(p, prev_sym, prev_freq, prev_succ);
1041            self.write_state(prev_p, p_sym, p_freq, p_succ);
1042            p = prev_p;
1043        }
1044        
1045        // Add 4 to first symbol's freq and SummFreq
1046        let first_freq = self.read_state_freq(stats as usize);
1047        self.write_state_freq(stats as usize, first_freq.saturating_add(4));
1048        let summ = self.read_context_summ_freq(ctx);
1049        self.write_context_summ_freq(ctx, summ.saturating_add(4));
1050        
1051        // Calculate EscFreq and Adder
1052        let new_first_freq = self.read_state_freq(stats as usize) as u32;
1053        let mut esc_freq = self.read_context_summ_freq(ctx) as i32 - new_first_freq as i32;
1054        let adder = if self.order_fall != 0 { 1 } else { 0 };
1055        
1056        // Halve first symbol's freq
1057        let halved = ((new_first_freq + adder) >> 1) as u8;
1058        self.write_state_freq(stats as usize, halved);
1059        let mut new_summ = halved as u16;
1060        
1061        // Halve all other frequencies, maintaining sorted order
1062        for i in 1..old_ns as usize {
1063            let state_ptr = stats as usize + i * 6;
1064            let freq = self.read_state_freq(state_ptr);
1065            esc_freq -= freq as i32;
1066            
1067            let halved = ((freq as u32 + adder) >> 1) as u8;
1068            self.write_state_freq(state_ptr, halved);
1069            new_summ += halved as u16;
1070            
1071            // Bubble up if needed (maintain sorted order by freq)
1072            if halved > self.read_state_freq(state_ptr - 6) {
1073                // Save current state
1074                let sym = self.read_state_symbol(state_ptr);
1075                let succ = self.read_state_successor(state_ptr);
1076                
1077                // Find insertion point
1078                let mut j = state_ptr - 6;
1079                while j >= stats as usize + 6 && halved > self.read_state_freq(j - 6) {
1080                    j -= 6;
1081                }
1082                
1083                // Shift states down
1084                let mut k = state_ptr;
1085                while k > j {
1086                    let prev_sym = self.read_state_symbol(k - 6);
1087                    let prev_freq = self.read_state_freq(k - 6);
1088                    let prev_succ = self.read_state_successor(k - 6);
1089                    self.write_state(k, prev_sym, prev_freq, prev_succ);
1090                    k -= 6;
1091                }
1092                
1093                // Insert at j
1094                self.write_state(j, sym, halved, succ);
1095            }
1096        }
1097        
1098        // Handle zero-frequency states (remove them)
1099        let mut num_zeros = 0;
1100        for i in (0..old_ns as usize).rev() {
1101            let state_ptr = stats as usize + i * 6;
1102            if self.read_state_freq(state_ptr) == 0 {
1103                num_zeros += 1;
1104            } else {
1105                break;
1106            }
1107        }
1108        
1109        if num_zeros > 0 {
1110            esc_freq += num_zeros;
1111            let new_ns = old_ns - num_zeros as u16;
1112            
1113            if new_ns == 1 {
1114                // Convert back to binary context
1115                let sym = self.read_state_symbol(stats as usize);
1116                let mut freq = self.read_state_freq(stats as usize);
1117                let succ = self.read_state_successor(stats as usize);
1118                
1119                // Halve freq until EscFreq <= 1
1120                while esc_freq > 1 {
1121                    freq = freq.saturating_sub(freq >> 1);
1122                    esc_freq >>= 1;
1123                }
1124                
1125                // Free the stats array
1126                let units = (old_ns as usize + 1) >> 1;
1127                self.sub_alloc.free_units(stats as usize, units);
1128                
1129                // Write OneState
1130                self.write_context_num_stats(ctx, 1);
1131                self.write_state(ctx + 2, sym, freq, succ);
1132                self.found_state = (ctx + 2) as u32;
1133                return;
1134            }
1135            
1136            self.write_context_num_stats(ctx, new_ns);
1137            
1138            // TODO: Shrink stats array if needed (requires shrink_units in allocator)
1139            // For now, we just leave the extra space allocated
1140        }
1141        
1142        // Update SummFreq with remaining escape frequency
1143        new_summ += (esc_freq - (esc_freq >> 1)) as u16;
1144        self.write_context_summ_freq(ctx, new_summ);
1145        
1146        // FoundState is now the first state
1147        let new_stats = self.read_context_stats(ctx);
1148        self.found_state = new_stats;
1149    }
1150
1151    /// Create successors for the current context chain.
1152    /// Returns the new context, or 0 on failure.
1153    fn create_successors(&mut self, skip: bool, p1: Option<usize>) -> u32 {
1154        let up_branch = self.read_state_successor(self.found_state as usize);
1155        let fs_symbol = self.read_state_symbol(self.found_state as usize);
1156        
1157        #[cfg(test)]
1158        if self.debug_count == 12 {
1159            eprintln!("[CS pos=12] Entry: skip={} p1={:?} up_branch={} fs_symbol='{}'",
1160                     skip, p1, up_branch, fs_symbol as char);
1161        }
1162        
1163        let mut pc = self.min_context;
1164        let mut ps: [usize; MAX_O] = [0; MAX_O];
1165        let mut pps_idx = 0;
1166        
1167        if !skip {
1168            ps[pps_idx] = self.found_state as usize;
1169            pps_idx += 1;
1170            let suffix = self.read_context_suffix(pc as usize);
1171            if suffix == 0 {
1172                // goto NO_LOOP
1173                if pps_idx == 0 {
1174                    return pc;
1175                }
1176                return self.create_successors_finish(pc, &ps, pps_idx, up_branch, fs_symbol);
1177            }
1178        }
1179        
1180        let mut p: usize;
1181        let mut start_in_loop = false;
1182        if let Some(p1_val) = p1 {
1183            p = p1_val;
1184            pc = self.read_context_suffix(pc as usize);
1185            start_in_loop = true;
1186        } else {
1187            p = 0; // Will be set in loop
1188        }
1189        
1190        // Main loop
1191        loop {
1192            if !start_in_loop {
1193                pc = self.read_context_suffix(pc as usize);
1194                if pc == 0 {
1195                    break;
1196                }
1197                
1198                let num_stats = self.read_context_num_stats(pc as usize);
1199                if num_stats != 1 {
1200                    let stats = self.read_context_stats(pc as usize);
1201                    p = stats as usize;
1202                    if self.read_state_symbol(p) != fs_symbol {
1203                        loop {
1204                            p += 6;
1205                            if self.read_state_symbol(p) == fs_symbol {
1206                                break;
1207                            }
1208                        }
1209                    }
1210                } else {
1211                    // OneState at context+2 (in union)
1212                    p = pc as usize + 2;
1213                }
1214            }
1215            start_in_loop = false; // Only skip to LOOP_ENTRY on first iteration
1216            
1217            // LOOP_ENTRY
1218            let p_successor = self.read_state_successor(p);
1219            if p_successor != up_branch {
1220                pc = p_successor;
1221                break;
1222            }
1223            
1224            if pps_idx >= MAX_O {
1225                return 0;
1226            }
1227            ps[pps_idx] = p;
1228            pps_idx += 1;
1229            
1230            let suffix = self.read_context_suffix(pc as usize);
1231            if suffix == 0 {
1232                break;
1233            }
1234        }
1235        
1236        self.create_successors_finish(pc, &ps, pps_idx, up_branch, fs_symbol)
1237    }
1238    
1239    fn create_successors_finish(&mut self, mut pc: u32, ps: &[usize; MAX_O], pps_idx: usize, up_branch: u32, fs_symbol: u8) -> u32 {
1240        #[cfg(test)]
1241        if self.debug_count == 12 {
1242            eprintln!("[CS_FINISH pos=12] pc={} pps_idx={} up_branch={} fs_symbol='{}'",
1243                     pc, pps_idx, up_branch, fs_symbol as char);
1244            for i in 0..pps_idx {
1245                let sym = self.read_state_symbol(ps[i]);
1246                eprintln!("[CS_FINISH pos=12] ps[{}]={} sym='{}'", i, ps[i], sym as char);
1247            }
1248        }
1249        
1250        // Suppress unused warning when not in test mode
1251        let _ = fs_symbol;
1252        
1253        if pps_idx == 0 {
1254            return pc;
1255        }
1256        
1257        // UpState.Symbol = *(byte*)UpBranch
1258        let up_state_symbol = self.sub_alloc.read_byte(up_branch as usize);
1259        // UpState.Successor = (byte*)UpBranch + 1
1260        let up_state_successor = up_branch + 1;
1261        
1262        let up_state_freq: u8;
1263        let num_stats = self.read_context_num_stats(pc as usize);
1264        if num_stats != 1 {
1265            let text_ptr = self.sub_alloc.get_text_ptr();
1266            if pc as usize <= text_ptr {
1267                return 0;
1268            }
1269            
1270            let stats = self.read_context_stats(pc as usize);
1271            let mut p = stats as usize;
1272            if self.read_state_symbol(p) != up_state_symbol {
1273                loop {
1274                    p += 6;
1275                    if self.read_state_symbol(p) == up_state_symbol {
1276                        break;
1277                    }
1278                }
1279            }
1280            
1281            let cf = self.read_state_freq(p) as u32 - 1;
1282            let s0 = self.read_context_summ_freq(pc as usize) as u32 - num_stats as u32 - cf;
1283            // unrar: UpState.Freq=1+((2*cf <= s0)?(5*cf > s0):((2*cf+3*s0-1)/(2*s0)));
1284            // Note: the 1+ applies to the entire expression!
1285            up_state_freq = (1 + if 2 * cf <= s0 {
1286                if 5 * cf > s0 { 1 } else { 0 }
1287            } else {
1288                (2 * cf + 3 * s0 - 1) / (2 * s0)
1289            }).min(255) as u8;
1290        } else {
1291            // OneState.Freq (at offset 2+1=3)
1292            up_state_freq = self.read_state_freq(pc as usize + 2);
1293        }
1294        
1295        // Create children in reverse order
1296        let mut i = pps_idx;
1297        while i > 0 {
1298            i -= 1;
1299            pc = self.create_child(pc, ps[i], up_state_symbol, up_state_freq, up_state_successor);
1300            if pc == 0 {
1301                return 0;
1302            }
1303        }
1304        
1305        pc
1306    }
1307
1308    fn update_model(&mut self) {
1309        #[cfg(test)]
1310        if self.debug_count == 11 || self.debug_count == 12 {
1311            eprintln!("[UPDATE pos={}] Before: min_context={} max_context={} order_fall={}", 
1312                     self.debug_count, self.min_context, self.max_context, self.order_fall);
1313        }
1314        
1315        // Read the found state
1316        let fs_symbol = self.read_state_symbol(self.found_state as usize);
1317        let fs_freq = self.read_state_freq(self.found_state as usize);
1318        let fs_successor = self.read_state_successor(self.found_state as usize);
1319        
1320        #[cfg(test)]
1321        if self.debug_count == 12 {
1322            eprintln!("[UPDATE pos=12] found_state={} fs_sym='{}' fs_freq={} fs_successor={}", 
1323                     self.found_state, fs_symbol as char, fs_freq, fs_successor);
1324            let text_ptr = self.sub_alloc.get_text_ptr();
1325            eprintln!("[UPDATE pos=12] text_ptr={}, fs_successor<=text_ptr: {}", 
1326                     text_ptr, (fs_successor as usize) <= text_ptr);
1327        }
1328        
1329        // Update frequency in parent context (suffix) and find p
1330        let mut p: Option<usize> = None;
1331        let suffix = self.read_context_suffix(self.min_context as usize);
1332        if suffix != 0 && (fs_freq as u32) < MAX_FREQ / 4 {
1333            let num_stats = self.read_context_num_stats(suffix as usize);
1334            if num_stats != 1 {
1335                // Find the symbol in parent's stats
1336                let stats = self.read_context_stats(suffix as usize);
1337                let mut state_ptr = stats as usize;
1338                if self.read_state_symbol(state_ptr) != fs_symbol {
1339                    loop {
1340                        state_ptr += 6;
1341                        if self.read_state_symbol(state_ptr) == fs_symbol {
1342                            break;
1343                        }
1344                    }
1345                    // Swap with previous if freq >= prev freq (move to front)
1346                    let freq = self.read_state_freq(state_ptr);
1347                    let prev_freq = self.read_state_freq(state_ptr - 6);
1348                    if freq >= prev_freq {
1349                        // Swap states
1350                        let prev_ptr = state_ptr - 6;
1351                        let curr_sym = self.read_state_symbol(state_ptr);
1352                        let curr_freq = self.read_state_freq(state_ptr);
1353                        let curr_succ = self.read_state_successor(state_ptr);
1354                        let prev_sym = self.read_state_symbol(prev_ptr);
1355                        let prev_freq = self.read_state_freq(prev_ptr);
1356                        let prev_succ = self.read_state_successor(prev_ptr);
1357                        self.write_state(state_ptr, prev_sym, prev_freq, prev_succ);
1358                        self.write_state(prev_ptr, curr_sym, curr_freq, curr_succ);
1359                        state_ptr = prev_ptr;
1360                    }
1361                }
1362                p = Some(state_ptr);
1363                let freq = self.read_state_freq(state_ptr);
1364                if (freq as u32) < MAX_FREQ - 9 {
1365                    self.write_state_freq(state_ptr, freq + 2);
1366                    let sf = self.read_context_summ_freq(suffix as usize);
1367                    self.write_context_summ_freq(suffix as usize, sf + 2);
1368                }
1369            } else {
1370                // Binary context - OneState at suffix+2 (in union)
1371                let one_state_ptr = suffix as usize + 2;
1372                p = Some(one_state_ptr);
1373                let freq = self.read_state_freq(one_state_ptr);
1374                if freq < 32 {
1375                    self.write_state_freq(one_state_ptr, freq + 1);
1376                }
1377            }
1378        }
1379        
1380        // If order_fall == 0, just create successors and return
1381        if self.order_fall == 0 {
1382            let new_ctx = self.create_successors(true, p);
1383            if new_ctx == 0 {
1384                self.restart_model();
1385                return;
1386            }
1387            self.write_state_successor(self.found_state as usize, new_ctx);
1388            self.min_context = new_ctx;
1389            self.max_context = new_ctx;
1390            return;
1391        }
1392        
1393        // Write symbol to text memory
1394        let text_ptr = self.sub_alloc.get_text_ptr();
1395        let units_start = self.sub_alloc.get_units_start();
1396        if text_ptr >= units_start {
1397            self.restart_model();
1398            return;
1399        }
1400        self.sub_alloc.write_byte(text_ptr, fs_symbol);
1401        self.sub_alloc.advance_text_ptr();
1402        
1403        let mut successor = self.sub_alloc.get_text_ptr() as u32;
1404        
1405        // fs_successor_new tracks what we'll use for max/min context at the end
1406        let fs_successor_new: u32;
1407        
1408        if fs_successor != 0 {
1409            let text_ptr = self.sub_alloc.get_text_ptr();
1410            if (fs_successor as usize) <= text_ptr {
1411                let new_succ = self.create_successors(false, p);
1412                if new_succ == 0 {
1413                    self.restart_model();
1414                    return;
1415                }
1416                self.write_state_successor(self.found_state as usize, new_succ);
1417                fs_successor_new = new_succ;
1418            } else {
1419                fs_successor_new = fs_successor;
1420            }
1421            self.order_fall -= 1;
1422            if self.order_fall == 0 {
1423                // Update successor to use fs.Successor instead of text pointer
1424                successor = fs_successor_new;
1425                if self.max_context != self.min_context {
1426                    // Undo text ptr advance
1427                    self.sub_alloc.retreat_text_ptr();
1428                }
1429                // NOTE: Don't return early! Continue to expansion loop.
1430                // This is the key fix - unrar doesn't return here either.
1431            }
1432        } else {
1433            // First time seeing this symbol in this context chain
1434            self.write_state_successor(self.found_state as usize, successor);
1435            // fs.Successor = MinContext (for the final assignment)
1436            fs_successor_new = self.min_context;
1437        }
1438        
1439        // Add symbol to contexts from max_context to min_context
1440        let ns = self.read_context_num_stats(self.min_context as usize) as u32;
1441        let summ_freq = self.read_context_summ_freq(self.min_context as usize) as u32;
1442        let s0 = summ_freq.saturating_sub(ns).saturating_sub(fs_freq as u32).saturating_add(1);
1443        
1444        let mut pc = self.max_context;
1445        while pc != self.min_context {
1446            let ns1 = self.read_context_num_stats(pc as usize);
1447            
1448            if ns1 != 1 {
1449                // Multi-symbol context - expand if needed
1450                if (ns1 & 1) == 0 {
1451                    // Need to expand stats array
1452                    let old_stats = self.read_context_stats(pc as usize);
1453                    let new_stats = self.sub_alloc.expand_units(old_stats as usize, (ns1 >> 1) as usize);
1454                    if new_stats.is_none() {
1455                        self.restart_model();
1456                        return;
1457                    }
1458                    self.write_context_stats(pc as usize, new_stats.unwrap() as u32);
1459                }
1460                
1461                // Update summ_freq based on symbol distribution
1462                let mut sf_inc = 0u16;
1463                if 2 * ns1 < ns as u16 { sf_inc += 1; }
1464                let summ = self.read_context_summ_freq(pc as usize);
1465                if 4 * ns1 as u32 <= ns && summ <= 8 * ns1 { sf_inc += 2; }
1466                self.write_context_summ_freq(pc as usize, summ + sf_inc);
1467            } else {
1468                // Binary context - convert to multi-symbol
1469                let new_stats = self.sub_alloc.alloc_units(1);
1470                if new_stats.is_none() {
1471                    self.restart_model();
1472                    return;
1473                }
1474                let new_stats = new_stats.unwrap();
1475                
1476                // Copy OneState (at offset 2) to new stats
1477                let one_state_sym = self.read_state_symbol(pc as usize + 2);
1478                let one_state_freq = self.read_state_freq(pc as usize + 2);
1479                let one_state_succ = self.read_state_successor(pc as usize + 2);
1480                self.write_state(new_stats, one_state_sym, one_state_freq, one_state_succ);
1481                
1482                self.write_context_stats(pc as usize, new_stats as u32);
1483                
1484                // Update freq
1485                let freq = self.read_state_freq(new_stats);
1486                let new_freq = if (freq as u32) < MAX_FREQ / 4 - 1 {
1487                    freq * 2
1488                } else {
1489                    (MAX_FREQ - 4) as u8
1490                };
1491                self.write_state_freq(new_stats, new_freq);
1492                
1493                // Set summ_freq - use self.init_esc (dynamic, set during binary escape)
1494                let init_esc_extra = if ns > 3 { 1 } else { 0 };
1495                let new_summ = new_freq as u16 + self.init_esc as u16 + init_esc_extra as u16;
1496                self.write_context_summ_freq(pc as usize, new_summ);
1497                #[cfg(test)]
1498                if pc == 15728580 {
1499                    eprintln!("[UPDATE_MODEL pos={}] context {} promoted: new_freq={} init_esc={} init_esc_extra={} → SummFreq={}", 
1500                             self.debug_count, pc, new_freq, self.init_esc, init_esc_extra, new_summ);
1501                }
1502            }
1503            
1504            // Calculate new symbol's frequency
1505            let summ = self.read_context_summ_freq(pc as usize) as u32;
1506            let cf = 2 * fs_freq as u32 * (summ + 6);
1507            let sf = s0 + summ;
1508            
1509            let sym_freq: u8;
1510            if cf < 6 * sf {
1511                sym_freq = 1 + (cf > sf) as u8 + (cf >= 4 * sf) as u8;
1512                let summ = self.read_context_summ_freq(pc as usize);
1513                self.write_context_summ_freq(pc as usize, summ + 3);
1514                #[cfg(test)]
1515                if pc == 15728580 {
1516                    eprintln!("[UPDATE_MODEL pos={}] context {} SummFreq: {} → {} (branch1, sym_freq={})", 
1517                             self.debug_count, pc, summ, summ + 3, sym_freq);
1518                }
1519            } else {
1520                sym_freq = 4 + (cf >= 9 * sf) as u8 + (cf >= 12 * sf) as u8 + (cf >= 15 * sf) as u8;
1521                let summ = self.read_context_summ_freq(pc as usize);
1522                self.write_context_summ_freq(pc as usize, summ + sym_freq as u16);
1523                #[cfg(test)]
1524                if pc == 15728580 {
1525                    eprintln!("[UPDATE_MODEL pos={}] context {} SummFreq: {} → {} (branch2, sym_freq={})", 
1526                             self.debug_count, pc, summ, summ + sym_freq as u16, sym_freq);
1527                }
1528            }
1529            
1530            // Add new symbol at end of stats
1531            let stats = self.read_context_stats(pc as usize);
1532            let new_state_ptr = stats as usize + (ns1 as usize) * 6;
1533            
1534            #[cfg(test)]
1535            if ns1 >= 256 {
1536                eprintln!("[UPDATE] ERROR: Adding state at ns1={} to context {} - exceeds 256!", ns1, pc);
1537            }
1538            
1539            self.write_state(new_state_ptr, fs_symbol, sym_freq, successor);
1540            
1541            // Increment NumStats
1542            self.write_context_num_stats(pc as usize, ns1 + 1);
1543            
1544            // Move to suffix
1545            pc = self.read_context_suffix(pc as usize);
1546        }
1547        
1548        // Update context pointers to fs.Successor
1549        self.max_context = fs_successor_new;
1550        self.min_context = fs_successor_new;
1551        
1552        #[cfg(test)]
1553        if self.debug_count == 11 || self.debug_count == 12 {
1554            eprintln!("[UPDATE pos={}] After: min_context={} max_context={}", 
1555                     self.debug_count, self.min_context, self.max_context);
1556        }
1557    }
1558
1559    /// Clear the character mask.
1560    fn clear_mask(&mut self) {
1561        self.esc_count = 1;
1562        self.char_mask = [0; 256];
1563    }
1564
1565    // Helper methods for reading/writing context and state structures
1566
1567    fn read_context_num_stats(&self, offset: usize) -> u16 {
1568        self.sub_alloc.read_u16(offset)
1569    }
1570
1571    fn write_context_num_stats(&mut self, offset: usize, val: u16) {
1572        self.sub_alloc.write_u16(offset, val);
1573    }
1574
1575    fn read_context_summ_freq(&self, offset: usize) -> u16 {
1576        self.sub_alloc.read_u16(offset + 2)
1577    }
1578
1579    fn write_context_summ_freq(&mut self, offset: usize, val: u16) {
1580        self.sub_alloc.write_u16(offset + 2, val);
1581    }
1582
1583    fn read_context_stats(&self, offset: usize) -> u32 {
1584        let b0 = self.sub_alloc.read_byte(offset + 4) as u32;
1585        let b1 = self.sub_alloc.read_byte(offset + 5) as u32;
1586        let b2 = self.sub_alloc.read_byte(offset + 6) as u32;
1587        let b3 = self.sub_alloc.read_byte(offset + 7) as u32;
1588        b0 | (b1 << 8) | (b2 << 16) | (b3 << 24)
1589    }
1590
1591    fn write_context_stats(&mut self, offset: usize, val: u32) {
1592        self.sub_alloc.write_byte(offset + 4, val as u8);
1593        self.sub_alloc.write_byte(offset + 5, (val >> 8) as u8);
1594        self.sub_alloc.write_byte(offset + 6, (val >> 16) as u8);
1595        self.sub_alloc.write_byte(offset + 7, (val >> 24) as u8);
1596    }
1597
1598    fn read_context_suffix(&self, offset: usize) -> u32 {
1599        let b0 = self.sub_alloc.read_byte(offset + 8) as u32;
1600        let b1 = self.sub_alloc.read_byte(offset + 9) as u32;
1601        let b2 = self.sub_alloc.read_byte(offset + 10) as u32;
1602        let b3 = self.sub_alloc.read_byte(offset + 11) as u32;
1603        b0 | (b1 << 8) | (b2 << 16) | (b3 << 24)
1604    }
1605
1606    fn write_context_suffix(&mut self, offset: usize, val: u32) {
1607        self.sub_alloc.write_byte(offset + 8, val as u8);
1608        self.sub_alloc.write_byte(offset + 9, (val >> 8) as u8);
1609        self.sub_alloc.write_byte(offset + 10, (val >> 16) as u8);
1610        self.sub_alloc.write_byte(offset + 11, (val >> 24) as u8);
1611    }
1612
1613    fn read_context_one_state(&self, offset: usize) -> State {
1614        // OneState is at offset 2 (in the union with SummFreq+Stats)
1615        // Layout: Symbol(1) + Freq(1) + Successor(4) = 6 bytes at offset 2-7
1616        State {
1617            symbol: self.sub_alloc.read_byte(offset + 2),
1618            freq: self.sub_alloc.read_byte(offset + 3),
1619            successor: {
1620                let b0 = self.sub_alloc.read_byte(offset + 4) as u32;
1621                let b1 = self.sub_alloc.read_byte(offset + 5) as u32;
1622                let b2 = self.sub_alloc.read_byte(offset + 6) as u32;
1623                let b3 = self.sub_alloc.read_byte(offset + 7) as u32;
1624                b0 | (b1 << 8) | (b2 << 16) | (b3 << 24)
1625            },
1626        }
1627    }
1628
1629    fn write_context_one_state_freq(&mut self, offset: usize, freq: u8) {
1630        // OneState.Freq is at offset+3 (Symbol at +2, Freq at +3)
1631        self.sub_alloc.write_byte(offset + 3, freq);
1632    }
1633
1634    fn read_state_symbol(&self, offset: usize) -> u8 {
1635        self.sub_alloc.read_byte(offset)
1636    }
1637
1638    fn read_state_freq(&self, offset: usize) -> u8 {
1639        self.sub_alloc.read_byte(offset + 1)
1640    }
1641
1642    fn write_state_freq(&mut self, offset: usize, freq: u8) {
1643        self.sub_alloc.write_byte(offset + 1, freq);
1644    }
1645
1646    fn read_state_successor(&self, offset: usize) -> u32 {
1647        let b0 = self.sub_alloc.read_byte(offset + 2) as u32;
1648        let b1 = self.sub_alloc.read_byte(offset + 3) as u32;
1649        let b2 = self.sub_alloc.read_byte(offset + 4) as u32;
1650        let b3 = self.sub_alloc.read_byte(offset + 5) as u32;
1651        b0 | (b1 << 8) | (b2 << 16) | (b3 << 24)
1652    }
1653
1654    fn write_state_successor(&mut self, offset: usize, successor: u32) {
1655        self.sub_alloc.write_byte(offset + 2, successor as u8);
1656        self.sub_alloc.write_byte(offset + 3, (successor >> 8) as u8);
1657        self.sub_alloc.write_byte(offset + 4, (successor >> 16) as u8);
1658        self.sub_alloc.write_byte(offset + 5, (successor >> 24) as u8);
1659    }
1660
1661    fn write_state(&mut self, offset: usize, symbol: u8, freq: u8, successor: u32) {
1662        self.sub_alloc.write_byte(offset, symbol);
1663        self.sub_alloc.write_byte(offset + 1, freq);
1664        self.sub_alloc.write_byte(offset + 2, successor as u8);
1665        self.sub_alloc.write_byte(offset + 3, (successor >> 8) as u8);
1666        self.sub_alloc.write_byte(offset + 4, (successor >> 16) as u8);
1667        self.sub_alloc.write_byte(offset + 5, (successor >> 24) as u8);
1668    }
1669}
1670
1671impl Default for PpmModel {
1672    fn default() -> Self {
1673        Self::new()
1674    }
1675}