Skip to main content

datacortex_core/model/
sparse_model.rs

1//! SparseModel -- skip-context model for periodic patterns.
2//!
3//! Phase 4: Captures patterns like column-aligned data, repeating structures,
4//! and periodic byte patterns by using contexts that skip bytes.
5//!
6//! Two contexts:
7//! - Gap-2: hash(c2, c0_partial) -- skips c1, catches every-other-byte patterns
8//! - Gap-3: hash(c3, c1, c0_partial) -- skips c2, catches 3-byte periodic patterns
9
10use crate::state::context_map::ContextMap;
11use crate::state::state_map::StateMap;
12use crate::state::state_table::StateTable;
13
14/// FNV-1a prime for hashing.
15const FNV_PRIME: u32 = 0x01000193;
16const FNV_OFFSET: u32 = 0x811C9DC5;
17
18/// Sparse context model using skip-byte contexts.
19pub struct SparseModel {
20    /// Gap-2 model: context = (c2, c0_partial), skips c1.
21    cmap_gap2: ContextMap,
22    smap_gap2: StateMap,
23    last_state_gap2: u8,
24    last_hash_gap2: u32,
25
26    /// Gap-3 model: context = (c3, c1, c0_partial), skips c2.
27    cmap_gap3: ContextMap,
28    smap_gap3: StateMap,
29    last_state_gap3: u8,
30    last_hash_gap3: u32,
31}
32
33impl SparseModel {
34    /// Create a sparse model with default 16MB total (8MB per gap context).
35    pub fn new() -> Self {
36        Self::with_size(1 << 23) // 8MB per gap context = 16MB total
37    }
38
39    /// Create a sparse model with a custom ContextMap size per gap context (in bytes).
40    /// Total memory is 2x this value.
41    pub fn with_size(cmap_size: usize) -> Self {
42        SparseModel {
43            cmap_gap2: ContextMap::new(cmap_size),
44            smap_gap2: StateMap::new(),
45            last_state_gap2: 0,
46            last_hash_gap2: 0,
47
48            cmap_gap3: ContextMap::new(cmap_size),
49            smap_gap3: StateMap::new(),
50            last_state_gap3: 0,
51            last_hash_gap3: 0,
52        }
53    }
54
55    /// Predict: returns average of gap-2 and gap-3 predictions.
56    #[inline]
57    pub fn predict(&mut self, c0: u32, c1: u8, c2: u8, c3: u8) -> u32 {
58        // Gap-2 context: skip c1
59        let h2 = gap2_hash(c2, c0);
60        let state2 = self.cmap_gap2.get(h2);
61        self.last_state_gap2 = state2;
62        self.last_hash_gap2 = h2;
63        let p2 = self.smap_gap2.predict(state2);
64
65        // Gap-3 context: skip c2
66        let h3 = gap3_hash(c3, c1, c0);
67        let state3 = self.cmap_gap3.get(h3);
68        self.last_state_gap3 = state3;
69        self.last_hash_gap3 = h3;
70        let p3 = self.smap_gap3.predict(state3);
71
72        // Blend: average in probability space
73        ((p2 + p3) / 2).clamp(1, 4095)
74    }
75
76    /// Update after observing bit.
77    #[inline]
78    pub fn update(&mut self, bit: u8) {
79        // Update gap-2
80        self.smap_gap2.update(self.last_state_gap2, bit);
81        let new2 = StateTable::next(self.last_state_gap2, bit);
82        self.cmap_gap2.set(self.last_hash_gap2, new2);
83
84        // Update gap-3
85        self.smap_gap3.update(self.last_state_gap3, bit);
86        let new3 = StateTable::next(self.last_state_gap3, bit);
87        self.cmap_gap3.set(self.last_hash_gap3, new3);
88    }
89}
90
91impl Default for SparseModel {
92    fn default() -> Self {
93        Self::new()
94    }
95}
96
97#[inline]
98fn gap2_hash(c2: u8, c0: u32) -> u32 {
99    let mut h = FNV_OFFSET ^ 0xDEAD; // different seed from order hashes
100    h ^= c2 as u32;
101    h = h.wrapping_mul(FNV_PRIME);
102    h ^= c0 & 0xFF;
103    h = h.wrapping_mul(FNV_PRIME);
104    h
105}
106
107#[inline]
108fn gap3_hash(c3: u8, c1: u8, c0: u32) -> u32 {
109    let mut h = FNV_OFFSET ^ 0xBEEF; // different seed
110    h ^= c3 as u32;
111    h = h.wrapping_mul(FNV_PRIME);
112    h ^= c1 as u32;
113    h = h.wrapping_mul(FNV_PRIME);
114    h ^= c0 & 0xFF;
115    h = h.wrapping_mul(FNV_PRIME);
116    h
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn initial_prediction_balanced() {
125        let mut sm = SparseModel::new();
126        let p = sm.predict(1, 0, 0, 0);
127        assert_eq!(p, 2048);
128    }
129
130    #[test]
131    fn predictions_in_range() {
132        let mut sm = SparseModel::new();
133        for i in 0..50u32 {
134            let p = sm.predict(1, i as u8, (i + 1) as u8, (i + 2) as u8);
135            assert!((1..=4095).contains(&p));
136            sm.update((i & 1) as u8);
137        }
138    }
139}