Skip to main content

i8051_disassembler/render/
data.rs

1use crate::address::AddressValue;
2
3pub struct DataHeuristics {
4    /// Bytes per `.db` row; also the unit for alignment and row-repeat.
5    pub block_size: usize,
6
7    /// How literal rows break for display; does not affect byte layout.
8    pub row_alignment: RowAlignment,
9
10    /// Collapse N+ identical block_size rows into one `.rept` (hexdump `*`).
11    /// None disables multi-byte row folding.
12    pub min_repeat_rows: Option<usize>,
13
14    /// RLE rules applied to runs in the body of the span.
15    pub interior: Vec<RunRule>,
16
17    /// Trim at the start of a mapped chunk (None = leave leading bytes literal).
18    pub leading: Option<EdgeTrim>,
19
20    /// Trim at the end of a mapped chunk (None = leave trailing bytes literal).
21    pub trailing: Option<EdgeTrim>,
22
23    /// When the entire span is zeros and length is at least this value, emit one
24    /// skip run for the whole span. `usize::MAX` disables the fast path.
25    pub all_zero_single_ds_min: usize,
26
27    /// When set, run compression only applies to the address-aligned middle of a
28    /// homogeneous run spanning at least this many `block_size` blocks.
29    pub min_aligned_blocks: Option<usize>,
30
31    /// Emit zero skip runs as multiple `.ds block_size` lines instead of one
32    /// `.ds N` for the full run length.
33    pub zero_skip_windowed: bool,
34}
35
36pub enum RowAlignment {
37    /// Pack `block_size` bytes from the start of each literal span.
38    Packed,
39    /// Align rows to the mapped chunk start.
40    Block,
41    /// Align rows to absolute address (xxd-style).
42    Global,
43}
44
45#[derive(Clone)]
46pub struct RunRule {
47    /// `Some(v)` matches a run of value `v`; `None` is the catch-all
48    /// (any repeated byte), evaluated only after all `Some` rules.
49    pub value: Option<u8>,
50    /// Minimum run length before this rule fires; shorter runs stay inline.
51    /// `None` means always literal.
52    pub min_run: Option<usize>,
53}
54
55#[derive(Clone)]
56pub struct EdgeTrim {
57    /// Run rules for this edge — may use different values / lower thresholds
58    /// than `interior` (edge padding is unambiguous, so trim sooner).
59    pub rules: Vec<RunRule>,
60    /// Literal bytes kept flush against the boundary, outside the fill run.
61    /// This is the DOS-ROM tail: e.g. 4 for a u32 checksum, 2 for a u16 sig.
62    pub reserve: usize,
63}
64
65pub enum Edge {
66    Leading,
67    Trailing,
68}
69
70impl Default for DataHeuristics {
71    fn default() -> Self {
72        Self {
73            block_size: 16,
74            row_alignment: RowAlignment::Block,
75            min_repeat_rows: None,
76            interior: vec![
77                RunRule {
78                    value: Some(0x00),
79                    min_run: Some(8),
80                },
81                RunRule {
82                    value: Some(0xFF),
83                    min_run: Some(64),
84                },
85            ],
86            leading: None,
87            trailing: Some(EdgeTrim {
88                rules: vec![
89                    RunRule {
90                        value: Some(0xFF),
91                        min_run: Some(16),
92                    },
93                    RunRule {
94                        value: Some(0x00),
95                        min_run: Some(4),
96                    },
97                ],
98                reserve: 4,
99            }),
100            all_zero_single_ds_min: 8,
101            min_aligned_blocks: None,
102            zero_skip_windowed: false,
103        }
104    }
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108enum RuleDecision {
109    Compress,
110    ForceLiteral,
111    NoMatch,
112}
113
114impl DataHeuristics {
115    pub fn iterate<'a>(
116        &self,
117        address: AddressValue,
118        edge: Option<Edge>,
119        bytes: &'a [u8],
120    ) -> impl Iterator<Item = DataChunk<'a, u8>> {
121        self.segment(address, edge, bytes).into_iter()
122    }
123
124    /// Split a literal span into `.db` rows per `block_size` and `row_alignment`.
125    pub fn literal_rows<'a>(&self, address: AddressValue, bytes: &'a [u8]) -> Vec<&'a [u8]> {
126        if bytes.is_empty() {
127            return Vec::new();
128        }
129        let bs = self.block_size.max(1);
130        match self.row_alignment {
131            RowAlignment::Packed | RowAlignment::Block => bytes.chunks(bs).map(|row| row).collect(),
132            RowAlignment::Global => {
133                let mut rows = Vec::new();
134                let mut i = 0;
135                let start_mod = (address as usize) % bs;
136                if start_mod != 0 {
137                    let first_len = (bs - start_mod).min(bytes.len());
138                    rows.push(&bytes[0..first_len]);
139                    i = first_len;
140                }
141                while i < bytes.len() {
142                    let end = (i + bs).min(bytes.len());
143                    rows.push(&bytes[i..end]);
144                    i = end;
145                }
146                rows
147            }
148        }
149    }
150
151    fn segment<'a>(
152        &self,
153        _address: AddressValue,
154        _edge: Option<Edge>,
155        bytes: &'a [u8],
156    ) -> Vec<DataChunk<'a, u8>> {
157        let mut out = Vec::new();
158        let len = bytes.len();
159        if len == 0 {
160            return out;
161        }
162
163        // reserve carves literal head/tail; runs are only detected in the window
164        let head = self.leading.as_ref().map_or(0, |e| e.reserve).min(len);
165        let tail = self
166            .trailing
167            .as_ref()
168            .map_or(0, |e| e.reserve)
169            .min(len - head);
170        let win_start = head;
171        let win_end = len - tail;
172
173        let mut lit_start = 0usize; // pending literal span start (absolute, incl. reserved head)
174        let mut i = win_start;
175
176        while i < win_end {
177            let value = bytes[i];
178            let mut j = i + 1;
179            while j < win_end && bytes[j] == value {
180                j += 1;
181            }
182            let run_len = j - i;
183            let at_start = i == win_start;
184            let at_end = j == win_end;
185
186            if self.should_compress(value, run_len, at_start, at_end) {
187                push_literal(&mut out, bytes, lit_start, i);
188                out.push(DataChunk::Run(value, run_len));
189                i = j;
190                lit_start = j;
191                continue;
192            }
193
194            if let Some(count) = self.block_run_at(bytes, i, win_end) {
195                let unit_len = self.block_size;
196                push_literal(&mut out, bytes, lit_start, i);
197                out.push(DataChunk::BlockRun(&bytes[i..i + unit_len], count));
198                i += unit_len * count;
199                lit_start = i;
200                continue;
201            }
202
203            // equal-run wasn't compressible: leave it in the pending literal span
204            i = j;
205        }
206
207        // flush trailing in-window literal + reserved tail as one span
208        push_literal(&mut out, bytes, lit_start, len);
209        out
210    }
211
212    fn should_compress(&self, value: u8, run_len: usize, at_start: bool, at_end: bool) -> bool {
213        let decide = |rules: &[RunRule]| rule_decision(rules, value, run_len);
214
215        // edge rule sets (relaxed thresholds) take precedence over interior;
216        // a matching `Literal` rule short-circuits to "don't compress"
217        if at_start {
218            if let Some(edge) = &self.leading {
219                match decide(&edge.rules) {
220                    RuleDecision::Compress => return true,
221                    RuleDecision::ForceLiteral => return false,
222                    RuleDecision::NoMatch => {}
223                }
224            }
225        }
226        if at_end {
227            if let Some(edge) = &self.trailing {
228                match decide(&edge.rules) {
229                    RuleDecision::Compress => return true,
230                    RuleDecision::ForceLiteral => return false,
231                    RuleDecision::NoMatch => {}
232                }
233            }
234        }
235        matches!(decide(&self.interior), RuleDecision::Compress)
236    }
237
238    fn block_run_at(&self, bytes: &[u8], i: usize, end: usize) -> Option<usize> {
239        let min_rows = self.min_repeat_rows?;
240        let bs = self.block_size;
241        if bs == 0 || min_rows == 0 || i + bs.checked_mul(min_rows)? > end {
242            return None;
243        }
244        let unit = &bytes[i..i + bs];
245        let mut count = 1;
246        let mut pos = i + bs;
247        while pos + bs <= end && &bytes[pos..pos + bs] == unit {
248            count += 1;
249            pos += bs;
250        }
251        (count >= min_rows).then_some(count)
252    }
253}
254
255fn rule_decision(rules: &[RunRule], value: u8, run_len: usize) -> RuleDecision {
256    // exact-value rules before the `None` catch-all
257    for rule in rules.iter().filter(|r| r.value == Some(value)) {
258        if let Some(min_run) = rule.min_run {
259            if run_len >= min_run {
260                return RuleDecision::Compress;
261            }
262        } else {
263            return RuleDecision::ForceLiteral;
264        }
265    }
266    for rule in rules.iter().filter(|r| r.value.is_none()) {
267        if let Some(min_run) = rule.min_run {
268            if run_len >= min_run {
269                return RuleDecision::Compress;
270            }
271        } else {
272            return RuleDecision::ForceLiteral;
273        }
274    }
275    RuleDecision::NoMatch
276}
277
278fn push_literal<'a>(out: &mut Vec<DataChunk<'a, u8>>, bytes: &'a [u8], start: usize, end: usize) {
279    if start < end {
280        out.push(DataChunk::Literal(&bytes[start..end]));
281    }
282}
283
284/// A processed chunk of data.
285#[derive(Debug, Clone, PartialEq, Eq)]
286pub enum DataChunk<'a, T> {
287    /// A row sequence of literals.
288    Literal(&'a [T]),
289    /// A run of a single value.
290    Run(T, usize),
291    /// A run of a single value that is a multiple of the block size.
292    BlockRun(&'a [T], usize),
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298    use DataChunk::*;
299
300    fn rule(value: Option<u8>, min_run: usize) -> RunRule {
301        RunRule {
302            value,
303            min_run: Some(min_run),
304        }
305    }
306
307    fn rule_literal(value: Option<u8>) -> RunRule {
308        RunRule {
309            value,
310            min_run: None,
311        }
312    }
313
314    fn heur(
315        interior: Vec<RunRule>,
316        leading: Option<EdgeTrim>,
317        trailing: Option<EdgeTrim>,
318    ) -> DataHeuristics {
319        DataHeuristics {
320            block_size: 4,
321            interior,
322            leading,
323            trailing,
324            ..Default::default()
325        }
326    }
327
328    fn run<'a>(h: &DataHeuristics, b: &'a [u8]) -> Vec<DataChunk<'a, u8>> {
329        h.iterate(0, None, b).collect()
330    }
331
332    #[test]
333    fn empty_yields_nothing() {
334        assert!(run(&heur(vec![], None, None), &[]).is_empty());
335    }
336
337    #[test]
338    fn all_literal_is_one_maximal_span() {
339        let h = heur(vec![rule(Some(0x00), 4)], None, None);
340        assert_eq!(run(&h, &[1, 2, 3, 4, 5]), vec![Literal(&[1, 2, 3, 4, 5])]);
341    }
342
343    #[test]
344    fn interior_run_compresses_and_splits_literals() {
345        let h = heur(vec![rule(Some(0x00), 4)], None, None);
346        assert_eq!(
347            run(&h, &[1, 2, 0, 0, 0, 0, 0, 3]),
348            vec![Literal(&[1, 2]), Run(0x00, 5), Literal(&[3])]
349        );
350    }
351
352    #[test]
353    fn run_below_min_stays_literal() {
354        let h = heur(vec![rule(Some(0x00), 4)], None, None);
355        assert_eq!(run(&h, &[1, 0, 0, 2]), vec![Literal(&[1, 0, 0, 2])]);
356    }
357
358    #[test]
359    fn min_run_boundary_for_repeat() {
360        let h = heur(vec![rule(Some(0xFF), 8)], None, None);
361        // 7 bytes: below min, whole thing literal
362        assert_eq!(
363            run(&h, &[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2]),
364            vec![Literal(&[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2])]
365        );
366        // 8 bytes: compresses
367        assert_eq!(
368            run(&h, &[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2]),
369            vec![Literal(&[1]), Run(0xFF, 8), Literal(&[2])]
370        );
371    }
372
373    #[test]
374    fn catch_all_matches_any_value() {
375        let h = heur(vec![rule(None, 4)], None, None);
376        assert_eq!(run(&h, &[0x42, 0x42, 0x42, 0x42, 0x42]), vec![Run(0x42, 5)]);
377    }
378
379    #[test]
380    fn literal_rule_short_circuits_catch_all() {
381        let h = heur(vec![rule_literal(Some(0x20)), rule(None, 1)], None, None);
382        assert_eq!(run(&h, &[0x20; 6]), vec![Literal(&[0x20; 6])]);
383    }
384
385    #[test]
386    fn trailing_reserve_preserves_checksum() {
387        let trailing = EdgeTrim {
388            rules: vec![rule(Some(0xFF), 4)],
389            reserve: 2,
390        };
391        let h = heur(vec![], None, Some(trailing));
392        let bytes = [10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xAA, 0xBB];
393        assert_eq!(
394            run(&h, &bytes),
395            vec![Literal(&[10, 11]), Run(0xFF, 6), Literal(&[0xAA, 0xBB])]
396        );
397    }
398
399    #[test]
400    fn leading_reserve_preserves_signature() {
401        let leading = EdgeTrim {
402            rules: vec![rule(Some(0x00), 2)],
403            reserve: 2,
404        };
405        let h = heur(vec![], Some(leading), None);
406        let bytes = [0xAA, 0x55, 0, 0, 0, 0, 10];
407        assert_eq!(
408            run(&h, &bytes),
409            vec![Literal(&[0xAA, 0x55]), Run(0x00, 4), Literal(&[10])]
410        );
411    }
412
413    #[test]
414    fn edge_rule_relaxes_interior_threshold() {
415        let interior = vec![rule(Some(0xFF), 64)];
416        let trailing = EdgeTrim {
417            rules: vec![rule(Some(0xFF), 4)],
418            reserve: 0,
419        };
420        let h = heur(interior, None, Some(trailing));
421        // interior min 64 would not fire, trailing min 4 does
422        assert_eq!(
423            run(&h, &[1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]),
424            vec![Literal(&[1]), Run(0xFF, 5)]
425        );
426    }
427
428    #[test]
429    fn literal_rows_pack_by_block_size() {
430        let h = DataHeuristics::default();
431        let bytes = (0..40).collect::<Vec<_>>();
432        let rows = h.literal_rows(0, &bytes);
433        assert_eq!(rows.len(), 3);
434        assert_eq!(rows[0].len(), 16);
435        assert_eq!(rows[1].len(), 16);
436        assert_eq!(rows[2].len(), 8);
437    }
438
439    #[test]
440    fn literal_rows_global_aligns_to_address() {
441        let mut h = DataHeuristics::default();
442        h.row_alignment = RowAlignment::Global;
443        let bytes = (0..20).collect::<Vec<_>>();
444        let rows = h.literal_rows(4, &bytes);
445        assert_eq!(rows.len(), 2);
446        assert_eq!(rows[0].len(), 12);
447        assert_eq!(rows[1].len(), 8);
448    }
449
450    #[test]
451    fn block_run_folds_repeated_unit() {
452        let mut h = heur(vec![], None, None);
453        h.block_size = 2;
454        h.min_repeat_rows = Some(3);
455        assert_eq!(
456            run(&h, &[0xDE, 0xAD, 0xDE, 0xAD, 0xDE, 0xAD]),
457            vec![BlockRun(&[0xDE, 0xAD], 3)]
458        );
459    }
460
461    #[test]
462    fn single_value_run_wins_over_block_run() {
463        let mut h = heur(vec![rule(Some(0xFF), 4)], None, None);
464        h.block_size = 2;
465        h.min_repeat_rows = Some(2);
466        assert_eq!(run(&h, &[0xFF; 6]), vec![Run(0xFF, 6)]);
467    }
468}