Skip to main content

datacortex_core/state/
context_map.rs

1//! ContextMap -- hash table mapping context hashes to states.
2//!
3//! Phase 4: Three variants:
4//! 1. Simple lossy ContextMap (1 byte per entry, for low-order models)
5//! 2. ChecksumContextMap (2 bytes: checksum + state, reduces collisions)
6//! 3. AssociativeContextMap (4 bytes: 2 slots with checksums, lowest collision)
7
8use super::state_table::StateTable;
9
10/// Simple lossy hash table mapping context hashes to state bytes.
11///
12/// Each entry is a single byte (state from StateTable).
13/// On collision, the existing entry is silently overwritten.
14pub struct ContextMap {
15    /// State entries indexed by (hash & mask).
16    table: Vec<u8>,
17    /// Bitmask for indexing: size - 1.
18    mask: usize,
19}
20
21impl ContextMap {
22    /// Create a new ContextMap with `size` entries.
23    /// `size` must be a power of 2.
24    pub fn new(size: usize) -> Self {
25        debug_assert!(size.is_power_of_two(), "ContextMap size must be power of 2");
26        ContextMap {
27            table: vec![0u8; size],
28            mask: size - 1,
29        }
30    }
31
32    /// Get the current state for a context hash.
33    #[inline(always)]
34    pub fn get(&self, hash: u32) -> u8 {
35        self.table[hash as usize & self.mask]
36    }
37
38    /// Set the state for a context hash.
39    #[inline(always)]
40    pub fn set(&mut self, hash: u32, state: u8) {
41        self.table[hash as usize & self.mask] = state;
42    }
43
44    /// Get state, then update it after observing `bit`.
45    /// Returns the state before transition.
46    /// Also transitions the state in-place.
47    #[inline]
48    pub fn predict_and_update(&mut self, hash: u32, bit: u8) -> u8 {
49        let idx = hash as usize & self.mask;
50        let state = self.table[idx];
51        self.table[idx] = StateTable::next(state, bit);
52        state
53    }
54}
55
56/// Checksummed context map: 2 bytes per entry (checksum + state).
57///
58/// The checksum byte is derived from the upper bits of the hash.
59/// On lookup, if the checksum doesn't match, the entry is treated as state 0.
60pub struct ChecksumContextMap {
61    /// Interleaved [checksum, state] pairs.
62    table: Vec<u8>,
63    /// Bitmask for indexing: (size/2) - 1.
64    mask: usize,
65}
66
67impl ChecksumContextMap {
68    /// Create a checksummed ContextMap.
69    /// `byte_size` is the total memory in bytes (must be power of 2).
70    pub fn new(byte_size: usize) -> Self {
71        debug_assert!(
72            byte_size.is_power_of_two(),
73            "ChecksumContextMap size must be power of 2"
74        );
75        let entries = byte_size / 2;
76        ChecksumContextMap {
77            table: vec![0u8; byte_size],
78            mask: entries - 1,
79        }
80    }
81
82    /// Extract checksum byte from hash.
83    #[inline(always)]
84    fn checksum(hash: u32) -> u8 {
85        ((hash >> 16) as u8) | 1 // ensure non-zero
86    }
87
88    /// Get state for hash, checking checksum.
89    #[inline(always)]
90    pub fn get(&self, hash: u32) -> u8 {
91        let idx = (hash as usize & self.mask) * 2;
92        let stored_cs = self.table[idx];
93        let expected_cs = Self::checksum(hash);
94        if stored_cs == expected_cs {
95            self.table[idx + 1]
96        } else {
97            0
98        }
99    }
100
101    /// Set state for hash with checksum.
102    #[inline(always)]
103    pub fn set(&mut self, hash: u32, state: u8) {
104        let idx = (hash as usize & self.mask) * 2;
105        self.table[idx] = Self::checksum(hash);
106        self.table[idx + 1] = state;
107    }
108}
109
110/// 2-way set-associative context map: 4 bytes per set (2 slots).
111///
112/// Each set has 2 slots, each with a checksum byte and state byte.
113/// On lookup, both slots are checked. On write, the slot matching the
114/// checksum is updated, or the least-used slot is replaced.
115/// This dramatically reduces collision damage for high-order models.
116pub struct AssociativeContextMap {
117    /// [cs0, state0, cs1, state1] per set.
118    table: Vec<u8>,
119    /// Bitmask for set indexing: (size/4) - 1.
120    mask: usize,
121}
122
123impl AssociativeContextMap {
124    /// Create a 2-way associative ContextMap.
125    /// `byte_size` is the total memory (must be power of 2, at least 8).
126    pub fn new(byte_size: usize) -> Self {
127        debug_assert!(
128            byte_size.is_power_of_two() && byte_size >= 8,
129            "AssociativeContextMap size must be power of 2 and >= 8"
130        );
131        let sets = byte_size / 4;
132        AssociativeContextMap {
133            table: vec![0u8; byte_size],
134            mask: sets - 1,
135        }
136    }
137
138    /// Extract checksum byte from hash.
139    #[inline(always)]
140    fn checksum(hash: u32) -> u8 {
141        ((hash >> 16) as u8) | 1
142    }
143
144    /// Get state for hash. Checks both slots.
145    #[inline(always)]
146    pub fn get(&self, hash: u32) -> u8 {
147        let base = (hash as usize & self.mask) * 4;
148        let cs = Self::checksum(hash);
149
150        // Check slot 0
151        if self.table[base] == cs {
152            return self.table[base + 1];
153        }
154        // Check slot 1
155        if self.table[base + 2] == cs {
156            return self.table[base + 3];
157        }
158        // Not found
159        0
160    }
161
162    /// Set state for hash. Updates matching slot or replaces slot 1 (LRU-ish).
163    #[inline(always)]
164    pub fn set(&mut self, hash: u32, state: u8) {
165        let base = (hash as usize & self.mask) * 4;
166        let cs = Self::checksum(hash);
167
168        // Check if slot 0 matches
169        if self.table[base] == cs {
170            self.table[base + 1] = state;
171            return;
172        }
173        // Check if slot 1 matches
174        if self.table[base + 2] == cs {
175            self.table[base + 3] = state;
176            return;
177        }
178        // Neither matches: evict slot 1 (move slot 0 to slot 1 first for LRU)
179        self.table[base + 2] = self.table[base];
180        self.table[base + 3] = self.table[base + 1];
181        self.table[base] = cs;
182        self.table[base + 1] = state;
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189
190    #[test]
191    fn new_entries_are_zero() {
192        let cm = ContextMap::new(1024);
193        assert_eq!(cm.get(0), 0);
194        assert_eq!(cm.get(999), 0);
195    }
196
197    #[test]
198    fn set_and_get() {
199        let mut cm = ContextMap::new(1024);
200        cm.set(42, 128);
201        assert_eq!(cm.get(42), 128);
202    }
203
204    #[test]
205    fn hash_masking() {
206        let mut cm = ContextMap::new(256);
207        cm.set(0, 10);
208        assert_eq!(cm.get(256), 10);
209    }
210
211    #[test]
212    fn predict_and_update_transitions() {
213        let mut cm = ContextMap::new(1024);
214        let state = cm.predict_and_update(42, 1);
215        assert_eq!(state, 0);
216        let new_state = cm.get(42);
217        assert_ne!(new_state, 0);
218    }
219
220    #[test]
221    fn lossy_collision() {
222        let mut cm = ContextMap::new(256);
223        cm.set(5, 100);
224        cm.set(5 + 256, 200);
225        assert_eq!(cm.get(5), 200);
226    }
227
228    // Checksummed ContextMap tests
229
230    #[test]
231    fn checksum_new_entries_return_zero() {
232        let cm = ChecksumContextMap::new(2048);
233        assert_eq!(cm.get(0), 0);
234        assert_eq!(cm.get(12345), 0);
235    }
236
237    #[test]
238    fn checksum_set_and_get() {
239        let mut cm = ChecksumContextMap::new(2048);
240        cm.set(42, 128);
241        assert_eq!(cm.get(42), 128);
242    }
243
244    #[test]
245    fn checksum_overwrites_properly() {
246        let mut cm = ChecksumContextMap::new(2048);
247        cm.set(42, 100);
248        cm.set(42, 200);
249        assert_eq!(cm.get(42), 200);
250    }
251
252    // Associative ContextMap tests
253
254    #[test]
255    fn assoc_new_entries_return_zero() {
256        let cm = AssociativeContextMap::new(4096);
257        assert_eq!(cm.get(0), 0);
258        assert_eq!(cm.get(12345), 0);
259    }
260
261    #[test]
262    fn assoc_set_and_get() {
263        let mut cm = AssociativeContextMap::new(4096);
264        cm.set(42, 128);
265        assert_eq!(cm.get(42), 128);
266    }
267
268    #[test]
269    fn assoc_two_entries_same_set() {
270        let mut cm = AssociativeContextMap::new(16); // 4 sets
271        // Two different hashes that map to same set but different checksums
272        cm.set(0x00010000, 100); // checksum derived from upper bits
273        cm.set(0x00020000, 200); // different checksum, might be same set
274        // Both should be retrievable (2-way associative)
275        assert_eq!(cm.get(0x00010000), 100);
276        assert_eq!(cm.get(0x00020000), 200);
277    }
278
279    #[test]
280    fn assoc_overwrites_properly() {
281        let mut cm = AssociativeContextMap::new(4096);
282        cm.set(42, 100);
283        cm.set(42, 200);
284        assert_eq!(cm.get(42), 200);
285    }
286}