1use crate::address::AddressValue;
2
3pub struct DataHeuristics {
4 pub block_size: usize,
6
7 pub row_alignment: RowAlignment,
9
10 pub min_repeat_rows: Option<usize>,
13
14 pub interior: Vec<RunRule>,
16
17 pub leading: Option<EdgeTrim>,
19
20 pub trailing: Option<EdgeTrim>,
22
23 pub all_zero_single_ds_min: usize,
26
27 pub min_aligned_blocks: Option<usize>,
30
31 pub zero_skip_windowed: bool,
34}
35
36pub enum RowAlignment {
37 Packed,
39 Block,
41 Global,
43}
44
45#[derive(Clone)]
46pub struct RunRule {
47 pub value: Option<u8>,
50 pub min_run: Option<usize>,
53}
54
55#[derive(Clone)]
56pub struct EdgeTrim {
57 pub rules: Vec<RunRule>,
60 pub reserve: usize,
63}
64
65pub enum Edge {
66 Leading,
67 Trailing,
68}
69
70impl Default for DataHeuristics {
71 fn default() -> Self {
72 Self {
73 block_size: 16,
74 row_alignment: RowAlignment::Block,
75 min_repeat_rows: None,
76 interior: vec![
77 RunRule {
78 value: Some(0x00),
79 min_run: Some(8),
80 },
81 RunRule {
82 value: Some(0xFF),
83 min_run: Some(64),
84 },
85 ],
86 leading: None,
87 trailing: Some(EdgeTrim {
88 rules: vec![
89 RunRule {
90 value: Some(0xFF),
91 min_run: Some(16),
92 },
93 RunRule {
94 value: Some(0x00),
95 min_run: Some(4),
96 },
97 ],
98 reserve: 4,
99 }),
100 all_zero_single_ds_min: 8,
101 min_aligned_blocks: None,
102 zero_skip_windowed: false,
103 }
104 }
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108enum RuleDecision {
109 Compress,
110 ForceLiteral,
111 NoMatch,
112}
113
114impl DataHeuristics {
115 pub fn iterate<'a>(
116 &self,
117 address: AddressValue,
118 edge: Option<Edge>,
119 bytes: &'a [u8],
120 ) -> impl Iterator<Item = DataChunk<'a, u8>> {
121 self.segment(address, edge, bytes).into_iter()
122 }
123
124 pub fn literal_rows<'a>(&self, address: AddressValue, bytes: &'a [u8]) -> Vec<&'a [u8]> {
126 if bytes.is_empty() {
127 return Vec::new();
128 }
129 let bs = self.block_size.max(1);
130 match self.row_alignment {
131 RowAlignment::Packed | RowAlignment::Block => bytes.chunks(bs).map(|row| row).collect(),
132 RowAlignment::Global => {
133 let mut rows = Vec::new();
134 let mut i = 0;
135 let start_mod = (address as usize) % bs;
136 if start_mod != 0 {
137 let first_len = (bs - start_mod).min(bytes.len());
138 rows.push(&bytes[0..first_len]);
139 i = first_len;
140 }
141 while i < bytes.len() {
142 let end = (i + bs).min(bytes.len());
143 rows.push(&bytes[i..end]);
144 i = end;
145 }
146 rows
147 }
148 }
149 }
150
151 fn segment<'a>(
152 &self,
153 _address: AddressValue,
154 _edge: Option<Edge>,
155 bytes: &'a [u8],
156 ) -> Vec<DataChunk<'a, u8>> {
157 let mut out = Vec::new();
158 let len = bytes.len();
159 if len == 0 {
160 return out;
161 }
162
163 let head = self.leading.as_ref().map_or(0, |e| e.reserve).min(len);
165 let tail = self
166 .trailing
167 .as_ref()
168 .map_or(0, |e| e.reserve)
169 .min(len - head);
170 let win_start = head;
171 let win_end = len - tail;
172
173 let mut lit_start = 0usize; let mut i = win_start;
175
176 while i < win_end {
177 let value = bytes[i];
178 let mut j = i + 1;
179 while j < win_end && bytes[j] == value {
180 j += 1;
181 }
182 let run_len = j - i;
183 let at_start = i == win_start;
184 let at_end = j == win_end;
185
186 if self.should_compress(value, run_len, at_start, at_end) {
187 push_literal(&mut out, bytes, lit_start, i);
188 out.push(DataChunk::Run(value, run_len));
189 i = j;
190 lit_start = j;
191 continue;
192 }
193
194 if let Some(count) = self.block_run_at(bytes, i, win_end) {
195 let unit_len = self.block_size;
196 push_literal(&mut out, bytes, lit_start, i);
197 out.push(DataChunk::BlockRun(&bytes[i..i + unit_len], count));
198 i += unit_len * count;
199 lit_start = i;
200 continue;
201 }
202
203 i = j;
205 }
206
207 push_literal(&mut out, bytes, lit_start, len);
209 out
210 }
211
212 fn should_compress(&self, value: u8, run_len: usize, at_start: bool, at_end: bool) -> bool {
213 let decide = |rules: &[RunRule]| rule_decision(rules, value, run_len);
214
215 if at_start {
218 if let Some(edge) = &self.leading {
219 match decide(&edge.rules) {
220 RuleDecision::Compress => return true,
221 RuleDecision::ForceLiteral => return false,
222 RuleDecision::NoMatch => {}
223 }
224 }
225 }
226 if at_end {
227 if let Some(edge) = &self.trailing {
228 match decide(&edge.rules) {
229 RuleDecision::Compress => return true,
230 RuleDecision::ForceLiteral => return false,
231 RuleDecision::NoMatch => {}
232 }
233 }
234 }
235 matches!(decide(&self.interior), RuleDecision::Compress)
236 }
237
238 fn block_run_at(&self, bytes: &[u8], i: usize, end: usize) -> Option<usize> {
239 let min_rows = self.min_repeat_rows?;
240 let bs = self.block_size;
241 if bs == 0 || min_rows == 0 || i + bs.checked_mul(min_rows)? > end {
242 return None;
243 }
244 let unit = &bytes[i..i + bs];
245 let mut count = 1;
246 let mut pos = i + bs;
247 while pos + bs <= end && &bytes[pos..pos + bs] == unit {
248 count += 1;
249 pos += bs;
250 }
251 (count >= min_rows).then_some(count)
252 }
253}
254
255fn rule_decision(rules: &[RunRule], value: u8, run_len: usize) -> RuleDecision {
256 for rule in rules.iter().filter(|r| r.value == Some(value)) {
258 if let Some(min_run) = rule.min_run {
259 if run_len >= min_run {
260 return RuleDecision::Compress;
261 }
262 } else {
263 return RuleDecision::ForceLiteral;
264 }
265 }
266 for rule in rules.iter().filter(|r| r.value.is_none()) {
267 if let Some(min_run) = rule.min_run {
268 if run_len >= min_run {
269 return RuleDecision::Compress;
270 }
271 } else {
272 return RuleDecision::ForceLiteral;
273 }
274 }
275 RuleDecision::NoMatch
276}
277
278fn push_literal<'a>(out: &mut Vec<DataChunk<'a, u8>>, bytes: &'a [u8], start: usize, end: usize) {
279 if start < end {
280 out.push(DataChunk::Literal(&bytes[start..end]));
281 }
282}
283
284#[derive(Debug, Clone, PartialEq, Eq)]
286pub enum DataChunk<'a, T> {
287 Literal(&'a [T]),
289 Run(T, usize),
291 BlockRun(&'a [T], usize),
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298 use DataChunk::*;
299
300 fn rule(value: Option<u8>, min_run: usize) -> RunRule {
301 RunRule {
302 value,
303 min_run: Some(min_run),
304 }
305 }
306
307 fn rule_literal(value: Option<u8>) -> RunRule {
308 RunRule {
309 value,
310 min_run: None,
311 }
312 }
313
314 fn heur(
315 interior: Vec<RunRule>,
316 leading: Option<EdgeTrim>,
317 trailing: Option<EdgeTrim>,
318 ) -> DataHeuristics {
319 DataHeuristics {
320 block_size: 4,
321 interior,
322 leading,
323 trailing,
324 ..Default::default()
325 }
326 }
327
328 fn run<'a>(h: &DataHeuristics, b: &'a [u8]) -> Vec<DataChunk<'a, u8>> {
329 h.iterate(0, None, b).collect()
330 }
331
332 #[test]
333 fn empty_yields_nothing() {
334 assert!(run(&heur(vec![], None, None), &[]).is_empty());
335 }
336
337 #[test]
338 fn all_literal_is_one_maximal_span() {
339 let h = heur(vec![rule(Some(0x00), 4)], None, None);
340 assert_eq!(run(&h, &[1, 2, 3, 4, 5]), vec![Literal(&[1, 2, 3, 4, 5])]);
341 }
342
343 #[test]
344 fn interior_run_compresses_and_splits_literals() {
345 let h = heur(vec![rule(Some(0x00), 4)], None, None);
346 assert_eq!(
347 run(&h, &[1, 2, 0, 0, 0, 0, 0, 3]),
348 vec![Literal(&[1, 2]), Run(0x00, 5), Literal(&[3])]
349 );
350 }
351
352 #[test]
353 fn run_below_min_stays_literal() {
354 let h = heur(vec![rule(Some(0x00), 4)], None, None);
355 assert_eq!(run(&h, &[1, 0, 0, 2]), vec![Literal(&[1, 0, 0, 2])]);
356 }
357
358 #[test]
359 fn min_run_boundary_for_repeat() {
360 let h = heur(vec![rule(Some(0xFF), 8)], None, None);
361 assert_eq!(
363 run(&h, &[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2]),
364 vec![Literal(&[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2])]
365 );
366 assert_eq!(
368 run(&h, &[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2]),
369 vec![Literal(&[1]), Run(0xFF, 8), Literal(&[2])]
370 );
371 }
372
373 #[test]
374 fn catch_all_matches_any_value() {
375 let h = heur(vec![rule(None, 4)], None, None);
376 assert_eq!(run(&h, &[0x42, 0x42, 0x42, 0x42, 0x42]), vec![Run(0x42, 5)]);
377 }
378
379 #[test]
380 fn literal_rule_short_circuits_catch_all() {
381 let h = heur(vec![rule_literal(Some(0x20)), rule(None, 1)], None, None);
382 assert_eq!(run(&h, &[0x20; 6]), vec![Literal(&[0x20; 6])]);
383 }
384
385 #[test]
386 fn trailing_reserve_preserves_checksum() {
387 let trailing = EdgeTrim {
388 rules: vec![rule(Some(0xFF), 4)],
389 reserve: 2,
390 };
391 let h = heur(vec![], None, Some(trailing));
392 let bytes = [10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAA, 0xBB];
393 assert_eq!(
394 run(&h, &bytes),
395 vec![Literal(&[10, 11]), Run(0xFF, 6), Literal(&[0xAA, 0xBB])]
396 );
397 }
398
399 #[test]
400 fn leading_reserve_preserves_signature() {
401 let leading = EdgeTrim {
402 rules: vec![rule(Some(0x00), 2)],
403 reserve: 2,
404 };
405 let h = heur(vec![], Some(leading), None);
406 let bytes = [0xAA, 0x55, 0, 0, 0, 0, 10];
407 assert_eq!(
408 run(&h, &bytes),
409 vec![Literal(&[0xAA, 0x55]), Run(0x00, 4), Literal(&[10])]
410 );
411 }
412
413 #[test]
414 fn edge_rule_relaxes_interior_threshold() {
415 let interior = vec![rule(Some(0xFF), 64)];
416 let trailing = EdgeTrim {
417 rules: vec![rule(Some(0xFF), 4)],
418 reserve: 0,
419 };
420 let h = heur(interior, None, Some(trailing));
421 assert_eq!(
423 run(&h, &[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]),
424 vec![Literal(&[1]), Run(0xFF, 5)]
425 );
426 }
427
428 #[test]
429 fn literal_rows_pack_by_block_size() {
430 let h = DataHeuristics::default();
431 let bytes = (0..40).collect::<Vec<_>>();
432 let rows = h.literal_rows(0, &bytes);
433 assert_eq!(rows.len(), 3);
434 assert_eq!(rows[0].len(), 16);
435 assert_eq!(rows[1].len(), 16);
436 assert_eq!(rows[2].len(), 8);
437 }
438
439 #[test]
440 fn literal_rows_global_aligns_to_address() {
441 let mut h = DataHeuristics::default();
442 h.row_alignment = RowAlignment::Global;
443 let bytes = (0..20).collect::<Vec<_>>();
444 let rows = h.literal_rows(4, &bytes);
445 assert_eq!(rows.len(), 2);
446 assert_eq!(rows[0].len(), 12);
447 assert_eq!(rows[1].len(), 8);
448 }
449
450 #[test]
451 fn block_run_folds_repeated_unit() {
452 let mut h = heur(vec![], None, None);
453 h.block_size = 2;
454 h.min_repeat_rows = Some(3);
455 assert_eq!(
456 run(&h, &[0xDE, 0xAD, 0xDE, 0xAD, 0xDE, 0xAD]),
457 vec![BlockRun(&[0xDE, 0xAD], 3)]
458 );
459 }
460
461 #[test]
462 fn single_value_run_wins_over_block_run() {
463 let mut h = heur(vec![rule(Some(0xFF), 4)], None, None);
464 h.block_size = 2;
465 h.min_repeat_rows = Some(2);
466 assert_eq!(run(&h, &[0xFF; 6]), vec![Run(0xFF, 6)]);
467 }
468}