Skip to main content

datacortex_core/model/
run_model.rs

1//! RunModel -- run-length context model.
2//!
3//! Phase 4: Detects and exploits byte-level runs (sequences of identical bytes).
4//! Also tracks bit-level run lengths for finer prediction.
5//!
6//! Context: (last byte, run length quantized, partial byte).
7//! Very effective on repetitive data (logs, JSON values, etc.)
8
9use crate::state::context_map::ContextMap;
10use crate::state::state_map::StateMap;
11use crate::state::state_table::StateTable;
12
13/// Run model: predicts based on run length of identical bytes.
14pub struct RunModel {
15    /// Context map for run context.
16    cmap: ContextMap,
17    /// State map.
18    smap: StateMap,
19    /// Current byte-level run length.
20    run_len: u32,
21    /// Last complete byte.
22    last_byte: u8,
23    /// Previous last byte (for detecting new runs).
24    prev_byte: u8,
25    /// Last state.
26    last_state: u8,
27    /// Last hash.
28    last_hash: u32,
29}
30
31impl RunModel {
32    /// Create a run model with default 4MB ContextMap.
33    pub fn new() -> Self {
34        Self::with_size(1 << 22) // 4MB
35    }
36
37    /// Create a run model with a custom ContextMap size (in bytes).
38    pub fn with_size(cmap_size: usize) -> Self {
39        RunModel {
40            cmap: ContextMap::new(cmap_size),
41            smap: StateMap::new(),
42            run_len: 0,
43            last_byte: 0,
44            prev_byte: 0,
45            last_state: 0,
46            last_hash: 0,
47        }
48    }
49
50    /// Predict based on run context.
51    /// `c0`: partial byte (1-255).
52    /// `bpos`: bit position (0-7).
53    /// `c1`: last completed byte.
54    #[inline]
55    pub fn predict(&mut self, c0: u32, bpos: u8, c1: u8) -> u32 {
56        if bpos == 0 {
57            self.update_run_state(c1);
58        }
59
60        // Context: run_len_quantized(3b) + c1(8b) + c0_partial(8b)
61        let run_q = quantize_run(self.run_len);
62        let mut h: u32 = 0x12345678;
63        h = h.wrapping_mul(0x01000193) ^ run_q as u32;
64        h = h.wrapping_mul(0x01000193) ^ c1 as u32;
65        h = h.wrapping_mul(0x01000193) ^ (c0 & 0xFF);
66
67        let state = self.cmap.get(h);
68        self.last_state = state;
69        self.last_hash = h;
70        self.smap.predict(state)
71    }
72
73    /// Update after observing bit.
74    #[inline]
75    pub fn update(&mut self, bit: u8) {
76        self.smap.update(self.last_state, bit);
77        let new_state = StateTable::next(self.last_state, bit);
78        self.cmap.set(self.last_hash, new_state);
79    }
80
81    /// Update run tracking.
82    fn update_run_state(&mut self, c1: u8) {
83        if c1 == self.last_byte {
84            self.run_len += 1;
85        } else {
86            self.run_len = 1;
87        }
88        self.prev_byte = self.last_byte;
89        self.last_byte = c1;
90    }
91}
92
93impl Default for RunModel {
94    fn default() -> Self {
95        Self::new()
96    }
97}
98
99/// Quantize run length to 0-7 range.
100#[inline]
101fn quantize_run(len: u32) -> u8 {
102    match len {
103        0..=1 => 0,
104        2 => 1,
105        3 => 2,
106        4..=5 => 3,
107        6..=8 => 4,
108        9..=16 => 5,
109        17..=32 => 6,
110        _ => 7,
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    #[test]
119    fn initial_prediction_balanced() {
120        let mut rm = RunModel::new();
121        let p = rm.predict(1, 0, 0);
122        assert_eq!(p, 2048);
123    }
124
125    #[test]
126    fn predictions_in_range() {
127        let mut rm = RunModel::new();
128        for i in 0..50u32 {
129            let p = rm.predict(1, 0, i as u8);
130            assert!((1..=4095).contains(&p));
131            rm.update((i & 1) as u8);
132        }
133    }
134}