Skip to main content

i8051_disassembler/
region.rs

1use i8051::{ControlFlow, Instruction};
2use std::collections::BTreeMap;
3use std::ops::{Bound, RangeBounds};
4use std::range::Range;
5
6use crate::address::{
7    AddressSpace, AddressValue, PhysicalAddr, Xref, branch_target, branch_target_operand_index,
8    xrefs_from_instruction, xrefs_to_target,
9};
10use crate::command::Command;
11use crate::db::{
12    Equivalent, EquivalentAt, EquivalentKind, EquivalentRange, Error, Function, OperandOverride,
13    SpaceUsage,
14};
15use crate::labels::{ImplicitLabels, LabelCollector, Labels};
16use crate::pattern::BytePattern;
17use crate::render::Line;
18
19#[derive(Debug, Clone)]
20pub enum ByteRange {
21    Mapped(String, usize, Vec<u8>),
22    Constant(AddressValue, u8),
23}
24
25impl ByteRange {
26    pub fn len(&self) -> AddressValue {
27        match self {
28            ByteRange::Mapped(_, _, data) => data.len() as AddressValue,
29            ByteRange::Constant(size, _) => *size,
30        }
31    }
32}
33
34pub struct Region {
35    byte_ranges: BTreeMap<AddressValue, ByteRange>,
36    equivalents: BTreeMap<AddressValue, EquivalentRange>,
37    labels: BTreeMap<AddressValue, String>,
38    comments: BTreeMap<AddressValue, String>,
39    functions: BTreeMap<AddressValue, Function>,
40}
41
42impl Region {
43    pub fn new() -> Self {
44        Self {
45            byte_ranges: BTreeMap::new(),
46            equivalents: BTreeMap::new(),
47            labels: BTreeMap::new(),
48            comments: BTreeMap::new(),
49            functions: BTreeMap::new(),
50        }
51    }
52
53    pub fn set_bytes(
54        &mut self,
55        file: &str,
56        file_offset: usize,
57        offset: AddressValue,
58        bytes: &[u8],
59    ) {
60        self.map_bytes(file, file_offset, offset, bytes);
61    }
62
63    pub fn map_bytes(
64        &mut self,
65        file: &str,
66        file_offset: usize,
67        offset: AddressValue,
68        bytes: &[u8],
69    ) {
70        if !bytes.is_empty() {
71            self.clear_bytes(offset, bytes.len() as AddressValue);
72        }
73        self.byte_ranges.insert(
74            offset,
75            ByteRange::Mapped(file.to_string(), file_offset, bytes.to_vec()),
76        );
77    }
78
79    pub fn set_constant(&mut self, offset: AddressValue, size: AddressValue, value: u8) {
80        if size == 0 {
81            return;
82        }
83        self.clear_bytes(offset, size);
84        self.byte_ranges
85            .insert(offset, ByteRange::Constant(size, value));
86    }
87
88    pub fn find_bytes(&self, pattern: &BytePattern) -> impl Iterator<Item = Range<AddressValue>> {
89        self.find_bytes_in(pattern, ..)
90    }
91
92    /// Find bytes in a specific range (`..` searches the whole region).
93    ///
94    /// Cross-byte-range matches and constant-byte-range matches do not
95    /// currently work.
96    pub fn find_bytes_in(
97        &self,
98        pattern: &BytePattern,
99        range: impl RangeBounds<AddressValue>,
100    ) -> impl Iterator<Item = Range<AddressValue>> {
101        let range_start_inclusive = match range.start_bound() {
102            Bound::Included(addr) => *addr,
103            Bound::Excluded(addr) => addr.saturating_add(1),
104            Bound::Unbounded => 0,
105        };
106        let range_end_inclusive = match range.end_bound() {
107            Bound::Included(addr) => *addr,
108            Bound::Excluded(addr) => addr.saturating_sub(1),
109            Bound::Unbounded => AddressValue::MAX,
110        };
111        self.byte_ranges
112            .iter()
113            .map(move |(addr, range)| match range {
114                ByteRange::Mapped(_, _, data) => {
115                    if ranges_overlap_inclusive(
116                        range_start_inclusive,
117                        range_end_inclusive,
118                        *addr,
119                        *addr + data.len() as AddressValue,
120                    ) {
121                        let data_start = range_start_inclusive.saturating_sub(*addr) as usize;
122                        let data_end = range_end_inclusive
123                            .saturating_sub(*addr)
124                            .min((data.len() - 1) as AddressValue)
125                            as usize;
126                        Some(
127                            pattern
128                                .find_all(&data[data_start..=data_end])
129                                .map(move |range| {
130                                    Range::from(
131                                        range
132                                            .start
133                                            .saturating_add(data_start)
134                                            .saturating_add(*addr as usize)
135                                            ..range
136                                                .end
137                                                .saturating_add(data_start)
138                                                .saturating_add(*addr as usize),
139                                    )
140                                }),
141                        )
142                    } else {
143                        None
144                    }
145                }
146                ByteRange::Constant(..) => None,
147            })
148            .flatten()
149            .flatten()
150            .map(|range| Range::from(range.start as AddressValue..range.end as AddressValue))
151    }
152
153    pub(crate) fn snapshot_byte_ranges(
154        &self,
155        offset: AddressValue,
156        size: AddressValue,
157    ) -> Vec<(AddressValue, ByteRange)> {
158        if size == 0 {
159            return Vec::new();
160        }
161        let end = offset.saturating_add(size);
162        self.byte_ranges
163            .iter()
164            .filter(|(start, range)| range.len() > offset && **start < end)
165            .map(|(start, range)| (*start as AddressValue, range.clone()))
166            .collect()
167    }
168
169    pub fn clear_bytes(&mut self, offset: AddressValue, size: AddressValue) {
170        if size == 0 {
171            return;
172        }
173        let end = offset.saturating_add(size);
174        let mut kept = BTreeMap::new();
175
176        for (&start, range) in &self.byte_ranges {
177            match range {
178                ByteRange::Mapped(file, file_offset, data) => {
179                    let range_end = start.saturating_add(data.len() as AddressValue);
180                    if range_end <= offset || start >= end {
181                        kept.insert(start, range.clone());
182                        continue;
183                    }
184                    if start < offset {
185                        let keep_len = offset - start;
186                        kept.insert(
187                            start,
188                            ByteRange::Mapped(
189                                file.clone(),
190                                *file_offset,
191                                data[..keep_len as usize].to_vec(),
192                            ),
193                        );
194                    }
195                    if range_end > end {
196                        let skip = end.saturating_sub(start);
197                        kept.insert(
198                            end,
199                            ByteRange::Mapped(
200                                file.clone(),
201                                file_offset.saturating_add(skip as usize),
202                                data[skip as _..].to_vec(),
203                            ),
204                        );
205                    }
206                }
207                ByteRange::Constant(count, value) => {
208                    let range_end = start.saturating_add(*count);
209                    if range_end <= offset || start >= end {
210                        kept.insert(start, range.clone());
211                        continue;
212                    }
213                    if start < offset {
214                        kept.insert(start, ByteRange::Constant(offset - start, *value));
215                    }
216                    if range_end > end {
217                        kept.insert(end, ByteRange::Constant(range_end - end, *value));
218                    }
219                }
220            }
221        }
222
223        self.byte_ranges = kept;
224    }
225
226    pub fn set_equivalent(
227        &mut self,
228        offset: AddressValue,
229        equivalent: Equivalent,
230    ) -> Result<&EquivalentRange, Error> {
231        if self.has_equivalent(offset) {
232            return Err(Error::NotUndefined(offset));
233        }
234
235        let span = self.equivalent_span(offset, &equivalent)?;
236        self.validate_equivalent_bounds(offset, span)?;
237        self.validate_no_equivalent_overlap(offset, span)?;
238
239        self.equivalents.insert(
240            offset,
241            EquivalentRange {
242                end: offset.saturating_add(span),
243                equivalent,
244            },
245        );
246        Ok(&self.equivalents[&offset])
247    }
248
249    pub fn clear_equivalents(&mut self, offset: AddressValue, size: AddressValue) {
250        if size == 0 {
251            return;
252        }
253        let end = offset.saturating_add(size);
254        self.equivalents
255            .retain(|&start, range| range.end <= offset || start >= end);
256    }
257
258    pub fn snapshot_equivalents(
259        &self,
260        offset: AddressValue,
261        size: AddressValue,
262    ) -> Vec<(AddressValue, EquivalentRange)> {
263        if size == 0 {
264            return Vec::new();
265        }
266        let end = offset.saturating_add(size);
267        self.equivalents
268            .iter()
269            .filter(|(start, range)| ranges_overlap(**start, range.end, offset, end))
270            .map(|(&start, range)| (start, range.clone()))
271            .collect()
272    }
273
274    pub fn has_equivalent(&self, offset: AddressValue) -> bool {
275        if self.equivalents.contains_key(&offset) {
276            return true;
277        }
278        if let Some((&_, range)) = self.equivalents.range(..=offset).next_back() {
279            return offset < range.end;
280        }
281        return false;
282    }
283
284    pub fn has_equivalent_exact(&self, offset: AddressValue) -> bool {
285        if self.equivalents.contains_key(&offset) {
286            return true;
287        }
288        return false;
289    }
290
291    pub fn get_equivalent_kind(&self, offset: AddressValue) -> Option<EquivalentKind> {
292        if let Some(range) = self.equivalents.get(&offset) {
293            return Some(range.equivalent.kind());
294        }
295        if let Some((&_, range)) = self.equivalents.range(..=offset).next_back() {
296            if offset < range.end {
297                return Some(range.equivalent.kind());
298            }
299        }
300        return None;
301    }
302
303    pub fn get_equivalent_kind_exact(&self, offset: AddressValue) -> Option<EquivalentKind> {
304        if let Some(range) = self.equivalents.get(&offset) {
305            return Some(range.equivalent.kind());
306        }
307        return None;
308    }
309
310    pub fn get_equivalent(&self, offset: AddressValue) -> EquivalentAt<'_> {
311        self.equivalent_at(offset)
312    }
313
314    fn equivalent_at(&self, offset: AddressValue) -> EquivalentAt<'_> {
315        if let Some(range) = self.equivalents.get(&offset) {
316            return EquivalentAt::Defined {
317                start: offset,
318                range,
319            };
320        }
321        if let Some((&start, range)) = self.equivalents.range(..=offset).next_back() {
322            if offset < range.end {
323                return EquivalentAt::Defined { start, range };
324            }
325        }
326        EquivalentAt::Undefined(self.undefined_range_at(offset))
327    }
328
329    fn undefined_range_at(&self, offset: AddressValue) -> Range<AddressValue> {
330        let after = offset.saturating_add(1);
331        let next_eq = self.equivalents.range(after..).next().map(|(&k, _)| k);
332        let next_lbl = self.labels.range(after..).next().map(|(&k, _)| k);
333        let next_cmt = self.comments.range(after..).next().map(|(&k, _)| k);
334        let end = [Some(self.end()), next_eq, next_lbl, next_cmt]
335            .into_iter()
336            .flatten()
337            .min()
338            .unwrap_or(self.end());
339        (offset..end).into()
340    }
341
342    pub fn set_label(&mut self, offset: AddressValue, label: &str) {
343        self.labels
344            .insert(offset as AddressValue, label.to_string());
345    }
346
347    pub fn clear_label(&mut self, offset: AddressValue) {
348        self.labels.remove(&(offset as AddressValue));
349    }
350
351    pub fn get_label(&self, offset: AddressValue) -> Option<&str> {
352        self.labels
353            .get(&(offset as AddressValue))
354            .map(String::as_str)
355    }
356
357    pub fn set_comment(&mut self, offset: AddressValue, comment: &str) {
358        self.comments
359            .insert(offset as AddressValue, comment.to_string());
360    }
361
362    pub fn clear_comment(&mut self, offset: AddressValue) {
363        self.comments.remove(&(offset as AddressValue));
364    }
365
366    pub fn get_comment(&self, offset: AddressValue) -> Option<&str> {
367        self.comments
368            .get(&(offset as AddressValue))
369            .map(String::as_str)
370    }
371
372    pub fn set_function(&mut self, function: Function) {
373        self.functions
374            .insert(function.addr.offset as AddressValue, function);
375    }
376
377    pub fn get_function(&self, offset: AddressValue) -> Option<&Function> {
378        self.functions.get(&(offset as AddressValue))
379    }
380
381    pub fn clear_function(&mut self, offset: AddressValue) {
382        self.functions.remove(&(offset as AddressValue));
383    }
384
385    pub fn byte_at(&self, offset: AddressValue) -> Option<u8> {
386        self.read_byte(offset)
387    }
388
389    pub fn read_u16_le(&self, offset: AddressValue) -> Option<u16> {
390        let low = self.read_byte(offset)?;
391        let high = self.read_byte(offset.saturating_add(1))?;
392        Some((low as u16) | ((high as u16) << 8))
393    }
394
395    pub fn read_u16_be(&self, offset: AddressValue) -> Option<u16> {
396        let high = self.read_byte(offset)?;
397        let low = self.read_byte(offset.saturating_add(1))?;
398        Some((high as u16) | ((low as u16) << 8))
399    }
400
401    pub fn bytes_at(&self, offset: AddressValue, size: AddressValue) -> Vec<u8> {
402        (0..size)
403            .filter_map(|i| self.read_byte(offset + i))
404            .collect()
405    }
406
407    /// Count mapped bytes classified as code, data, or undefined (no equivalent).
408    pub fn space_usage(&self) -> SpaceUsage {
409        let mut usage = SpaceUsage::default();
410        for (&start, range) in &self.equivalents {
411            let span = range.end.saturating_sub(start);
412            match &range.equivalent {
413                Equivalent::Code(_) => usage.code += span,
414                Equivalent::Data(_, _) => usage.data += span,
415            }
416        }
417
418        let mapped: AddressValue = self
419            .byte_ranges
420            .iter()
421            .map(|(&start, range)| range.len().saturating_sub(start))
422            .sum();
423
424        usage.undefined = mapped.saturating_sub(usage.code).saturating_sub(usage.data);
425
426        usage
427    }
428
429    pub(crate) fn render(
430        &self,
431        space: AddressSpace,
432        implicit_labels: &ImplicitLabels,
433    ) -> Vec<Line> {
434        let mut lines = Vec::new();
435        let start = self.start();
436        let end = self.end();
437        if start >= end {
438            return lines;
439        }
440
441        let default_labels = Labels::default();
442        let labels = implicit_labels.get(&space).unwrap_or(&default_labels);
443
444        let mut addr = start;
445        let mut need_org = true;
446        while addr < end {
447            if need_org {
448                lines.push(Line::Org { addr });
449                lines.push(Line::Blank);
450                need_org = false;
451            }
452
453            if let Some(function) = self.get_function(addr) {
454                lines.push(Line::Function {
455                    addr,
456                    name: function.name.clone(),
457                    signature: function.signature.clone(),
458                    length: function.length,
459                    noreturn: function.noreturn,
460                });
461            }
462            if let Some(comment) = self.get_comment(addr) {
463                lines.push(Line::Comment {
464                    addr,
465                    text: comment.to_string(),
466                });
467            }
468            if let Some(label) = self.get_label(addr) {
469                lines.push(Line::Label {
470                    addr,
471                    name: label.to_string(),
472                });
473            } else if let Some(label) = labels.get(&addr) {
474                lines.push(Line::Label {
475                    addr,
476                    name: label.to_string(),
477                });
478            }
479
480            match self.get_equivalent(addr) {
481                EquivalentAt::Defined { start: _, range } => match &range.equivalent {
482                    Equivalent::Code(overrides) => {
483                        let insn = self
484                            .decode_at(addr)
485                            .expect("validated code equivalent must decode");
486                        let text = self.format_instruction(addr, &insn, overrides, labels);
487                        lines.push(Line::Instruction {
488                            addr,
489                            direct: insn.direct(),
490                            text,
491                            bytes: insn.bytes().to_vec(),
492                        });
493                        addr = range.end;
494                    }
495                    Equivalent::Data(data_type, size) => {
496                        let bytes = self.bytes_at(addr, *size);
497                        lines.push(Line::Data {
498                            addr,
499                            data_type: data_type.clone(),
500                            bytes,
501                        });
502                        addr = range.end;
503                    }
504                },
505                EquivalentAt::Undefined(undefined) => {
506                    let span = self.raw_run_until_next_annotation(addr, undefined.end, &labels);
507                    if span == 0 {
508                        if let Some((&next_mapped, _)) =
509                            self.byte_ranges.range(addr.saturating_add(1)..).next()
510                        {
511                            if next_mapped < undefined.end {
512                                addr = next_mapped;
513                                need_org = true;
514                                continue;
515                            }
516                        }
517                        addr += 1;
518                        continue;
519                    }
520                    let bytes = self.bytes_at(addr, span);
521                    lines.push(Line::Raw { addr, bytes });
522                    addr += span;
523                }
524            }
525        }
526
527        lines
528    }
529
530    pub(crate) fn to_commands(&self, space: AddressSpace) -> Vec<Command> {
531        let mut commands = Vec::new();
532        for (&offset, range) in &self.byte_ranges {
533            match range {
534                ByteRange::Mapped(file, file_offset, data) => {
535                    commands.push(Command::map_bytes(
536                        space,
537                        offset,
538                        file.clone(),
539                        *file_offset,
540                        data.len() as AddressValue,
541                    ));
542                }
543                ByteRange::Constant(size, value) => {
544                    commands.push(Command::set_constant_bytes(space, offset, *size, *value));
545                }
546            }
547        }
548        for (&offset, equivalent_range) in &self.equivalents {
549            commands.push(Command::set_equivalent(
550                space,
551                offset,
552                equivalent_range.equivalent.clone(),
553            ));
554        }
555        for (&offset, label) in &self.labels {
556            commands.push(Command::set_label(space, offset, label.clone()));
557        }
558        for (&offset, comment) in &self.comments {
559            commands.push(Command::set_comment(space, offset, comment.clone()));
560        }
561        for (&offset, function) in &self.functions {
562            commands.push(Command::set_function(space, offset, function.clone()));
563        }
564        commands
565    }
566
567    pub(crate) fn xrefs_to(&self, space: AddressSpace, target: &PhysicalAddr) -> Vec<Xref> {
568        let mut xrefs = Vec::new();
569        for (&offset, equivalent_range) in &self.equivalents {
570            if !matches!(equivalent_range.equivalent, Equivalent::Code(_)) {
571                continue;
572            }
573            let Some(instruction) = self.decode_at(offset) else {
574                continue;
575            };
576            let source = PhysicalAddr {
577                space,
578                offset: offset as AddressValue,
579            };
580            xrefs.extend(xrefs_to_target(&instruction, source, target));
581        }
582        xrefs
583    }
584
585    pub(crate) fn xrefs_from(&self, source: &PhysicalAddr) -> Vec<Xref> {
586        let offset = source.offset;
587        let Some(equivalent_range) = self.equivalents.get(&offset) else {
588            return Vec::new();
589        };
590        if !matches!(equivalent_range.equivalent, Equivalent::Code(_)) {
591            return Vec::new();
592        }
593        let Some(instruction) = self.decode_at(offset) else {
594            return Vec::new();
595        };
596        xrefs_from_instruction(&instruction, *source)
597    }
598
599    /// Collect all the necessary references for this region.
600    pub(crate) fn collect_refs(&self, space: AddressSpace, refs: &mut LabelCollector) {
601        for (&offset, equivalent_range) in &self.equivalents {
602            if !matches!(equivalent_range.equivalent, Equivalent::Code(_)) {
603                continue;
604            }
605            let Some(instruction) = self.decode_at(offset) else {
606                continue;
607            };
608            xrefs_from_instruction(&instruction, PhysicalAddr { space, offset })
609                .into_iter()
610                .for_each(|xref| {
611                    if self.get_label(xref.to.offset).is_none() {
612                        refs.collect(xref.to.space, xref.to.offset, None);
613                    }
614                });
615        }
616    }
617
618    fn start(&self) -> AddressValue {
619        [
620            self.byte_ranges.keys().copied().next(),
621            self.equivalents.keys().copied().next(),
622            self.labels.keys().copied().next(),
623            self.comments.keys().copied().next(),
624        ]
625        .into_iter()
626        .flatten()
627        .min()
628        .unwrap_or(0) as AddressValue
629    }
630
631    /// Upper bound (exclusive) of mapped bytes and equivalents.
632    fn end(&self) -> AddressValue {
633        [
634            self.byte_ranges
635                .iter()
636                .next_back()
637                .map(|(&start, range)| start + range.len()),
638            self.equivalents
639                .iter()
640                .next_back()
641                .map(|(&start, range)| start + range.end),
642            self.labels.keys().next_back().copied(),
643            self.comments.keys().next_back().copied(),
644            self.functions.keys().next_back().copied(),
645        ]
646        .into_iter()
647        .flatten()
648        .max()
649        .map(|addr| addr + 1)
650        .unwrap_or(0)
651    }
652
653    fn read_byte(&self, offset: AddressValue) -> Option<u8> {
654        let (&start, range) = self.byte_ranges.range(..=offset).next_back()?;
655        match range {
656            ByteRange::Mapped(_, _, data) => data.get((offset - start) as usize).copied(),
657            ByteRange::Constant(size, value) if offset - start < *size => Some(*value),
658            ByteRange::Constant(_, _) => None,
659        }
660    }
661
662    fn decode_at(&self, address: AddressValue) -> Option<Instruction> {
663        let mut available = Vec::with_capacity(Instruction::MAX_LENGTH);
664
665        for i in 0..Instruction::MAX_LENGTH as AddressValue {
666            if let Some(b) = self.read_byte(address + i) {
667                available.push(b);
668            } else {
669                break;
670            }
671        }
672        if available.is_empty() {
673            return None;
674        }
675
676        let ins = Instruction::decode_from_bytes(address as _, &available);
677        if ins.len() > available.len() {
678            return None;
679        }
680
681        Some(ins)
682    }
683
684    pub(crate) fn equivalent_span(
685        &self,
686        offset: AddressValue,
687        equivalent: &Equivalent,
688    ) -> Result<AddressValue, Error> {
689        match equivalent {
690            Equivalent::Code(_) => self
691                .decode_at(offset)
692                .map(|insn| insn.len() as AddressValue)
693                .ok_or(Error::InvalidEquivalent),
694            Equivalent::Data(_, size) => Ok(*size),
695        }
696    }
697
698    fn validate_equivalent_bounds(
699        &self,
700        offset: AddressValue,
701        span: AddressValue,
702    ) -> Result<(), Error> {
703        for i in 0..span {
704            if self.read_byte(offset + i).is_none() {
705                return Err(Error::InvalidAddress(offset + i));
706            }
707        }
708        Ok(())
709    }
710
711    fn validate_no_equivalent_overlap(
712        &self,
713        offset: AddressValue,
714        span: AddressValue,
715    ) -> Result<(), Error> {
716        let end = offset.saturating_add(span);
717        if let Some((&other_start, other)) = self.equivalents.range(..end).next_back() {
718            if other.end > offset {
719                return Err(Error::Overlap(other_start));
720            }
721        }
722        Ok(())
723    }
724
725    fn raw_run_until_next_annotation(
726        &self,
727        addr: AddressValue,
728        limit: AddressValue,
729        implicit_labels: &Labels,
730    ) -> AddressValue {
731        let after = addr.saturating_add(1);
732        let mut boundary = limit;
733        if let Some((&start, _)) = self.equivalents.range(after..).next() {
734            boundary = boundary.min(start);
735        }
736        if let Some((&start, _)) = self.labels.range(after..).next() {
737            boundary = boundary.min(start);
738        }
739        if let Some((&start, _)) = self.comments.range(after..).next() {
740            boundary = boundary.min(start);
741        }
742        if let Some((&start, _)) = implicit_labels.range(after..).next() {
743            boundary = boundary.min(start);
744        }
745
746        let mut end = addr;
747        while end < boundary {
748            if self.read_byte(end).is_none() {
749                break;
750            }
751            end += 1;
752        }
753        end - addr
754    }
755
756    fn format_instruction(
757        &self,
758        _addr: AddressValue,
759        insn: &Instruction,
760        overrides: &[Option<OperandOverride>],
761        implicit_labels: &Labels,
762    ) -> String {
763        let decoded = insn.as_string();
764        let mut merged = overrides.to_vec();
765
766        if let (Some(target), Some(idx)) = (branch_target(insn), branch_target_operand_index(insn))
767        {
768            while merged.len() <= idx {
769                merged.push(None);
770            }
771            if merged[idx].is_none() {
772                let label = self
773                    .get_label(target)
774                    .or_else(|| implicit_labels.get(&target).map(String::as_str));
775                if let Some(label) = label {
776                    merged[idx] = Some(OperandOverride::Label(label.to_string()));
777                }
778            }
779        }
780
781        let text = if merged.iter().all(|o| o.is_none()) {
782            decoded
783        } else {
784            apply_operand_overrides(&decoded, &merged)
785        };
786        sdas_indent_instruction(&text)
787    }
788
789    /// Auto-disassembles code addresses recursively. Will not modify any address that already
790    /// has an equivalent.
791    pub fn auto_disassemble(&mut self, start: u32) -> AutoDisassembleResult {
792        let mut queue = Vec::new();
793        let mut result = AutoDisassembleResult::default();
794        queue.push(start);
795        while let Some(addr) = queue.pop() {
796            match self.get_equivalent_kind(addr) {
797                Some(EquivalentKind::Code) => {
798                    // Exact address means we ran into an existing code block
799                    // successfully
800                    if self.equivalents.contains_key(&addr) {
801                        continue;
802                    }
803                    result
804                        .errors
805                        .push((addr, AutoDisassembleError::Overlapped(EquivalentKind::Code)));
806                }
807                Some(EquivalentKind::Data) => {
808                    result
809                        .errors
810                        .push((addr, AutoDisassembleError::Overlapped(EquivalentKind::Data)));
811                    continue;
812                }
813                None => {}
814            }
815            let Ok(_) = self.set_equivalent(addr, Equivalent::Code(vec![])) else {
816                result
817                    .errors
818                    .push((addr, AutoDisassembleError::Overlapped(EquivalentKind::Code)));
819                continue;
820            };
821            result.success.push(addr);
822            if let Some(ins) = self.decode_at(addr) {
823                let flow = ins.control_flow();
824                match flow {
825                    ControlFlow::Continue(addr) => queue.push(addr),
826                    ControlFlow::Call(next, addr) => {
827                        queue.push(next);
828                        queue.push(addr);
829                    }
830                    ControlFlow::Choice(next, addr) => {
831                        queue.push(next);
832                        queue.push(addr);
833                    }
834                    ControlFlow::Diverge => {
835                        continue;
836                    }
837                }
838            }
839        }
840        result
841    }
842}
843
844fn sdas_indent_instruction(text: &str) -> String {
845    if let Some((mnemonic, operands)) = text.split_once(' ') {
846        format!("    {mnemonic:<8}{operands}")
847    } else {
848        format!("    {text}")
849    }
850}
851
852fn split_instruction(decoded: &str) -> (&str, Vec<&str>) {
853    let (mnemonic, rest) = decoded.split_once(' ').unwrap_or((decoded, ""));
854    let operands = if rest.is_empty() {
855        Vec::new()
856    } else {
857        rest.split(',').map(str::trim).collect()
858    };
859    (mnemonic, operands)
860}
861
862fn apply_operand_overrides(decoded: &str, overrides: &[Option<OperandOverride>]) -> String {
863    if overrides.is_empty() {
864        return decoded.to_string();
865    }
866    let (mnemonic, operands) = split_instruction(decoded);
867    let mut out = mnemonic.to_string();
868    let operand_count = operands.len().max(overrides.len());
869    for idx in 0..operand_count {
870        if idx > 0 {
871            out.push(',');
872        } else {
873            out.push(' ');
874        }
875        match overrides.get(idx).and_then(|o| o.as_ref()) {
876            Some(OperandOverride::Label(label)) => out.push_str(label),
877            Some(OperandOverride::LabelOffset { label, offset }) => {
878                if *offset >= 0 {
879                    out.push_str(&format!("{label}+{offset}"));
880                } else {
881                    out.push_str(&format!("{label}{offset}"));
882                }
883            }
884            Some(OperandOverride::Text(text)) => out.push_str(text),
885            None => {
886                if let Some(default) = operands.get(idx) {
887                    out.push_str(default);
888                }
889            }
890        }
891    }
892    out
893}
894
895/// A and B are inclusive start, exclusive end.
896fn ranges_overlap(
897    a_start: AddressValue,
898    a_end: AddressValue,
899    b_start: AddressValue,
900    b_end: AddressValue,
901) -> bool {
902    a_start < b_end && b_start < a_end
903}
904
905/// A and B are inclusive start, inclusive end.
906fn ranges_overlap_inclusive(
907    a_start: AddressValue,
908    a_end: AddressValue,
909    b_start: AddressValue,
910    b_end: AddressValue,
911) -> bool {
912    a_start <= b_end && b_start <= a_end
913}
914
915#[derive(Debug, Clone, PartialEq, Eq)]
916pub enum AutoDisassembleError {
917    /// Adding an instruction would have overlapped non-code bytes, or partially
918    /// overlapped other code.
919    Overlapped(EquivalentKind),
920}
921
922/// Result of auto-disassembling a region.
923#[must_use]
924#[derive(Debug, Clone, PartialEq, Eq, Default)]
925pub struct AutoDisassembleResult {
926    pub success: Vec<AddressValue>,
927    pub errors: Vec<(AddressValue, AutoDisassembleError)>,
928}
929
930impl AutoDisassembleResult {
931    pub fn is_success(&self) -> bool {
932        self.errors.is_empty()
933    }
934
935    pub fn unwrap_success(self) -> Vec<AddressValue> {
936        if self.errors.is_empty() {
937            self.success
938        } else {
939            if let Some(error) = self.errors.first() {
940                panic!("Auto-disassembly failed (first error at {:04X}))", error.0);
941            }
942            panic!("Auto-disassembly partially failed");
943        }
944    }
945}
946
947#[cfg(test)]
948mod tests {
949    use super::*;
950    use crate::address::AddressSpace;
951    use crate::db::{DataType, OperandOverride, SpaceUsage};
952
953    #[test]
954    fn overlapping_equivalents_are_rejected() {
955        let mut region = Region::new();
956        region.set_bytes(
957            "test.bin",
958            0,
959            0,
960            &[0x02, 0x00, 0x10, 0x74, 0x01, 0x00, 0x00, 0x00],
961        );
962        region.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
963        assert!(matches!(
964            region.set_equivalent(1, Equivalent::Code(vec![])),
965            Err(Error::NotUndefined(1))
966        ));
967        region.set_equivalent(6, Equivalent::Code(vec![])).unwrap();
968        assert!(matches!(
969            region.set_equivalent(4, Equivalent::Data(DataType::Byte, 3)),
970            Err(Error::Overlap(6))
971        ));
972    }
973
974    #[test]
975    fn clear_bytes_splits_straddling_range() {
976        let mut region = Region::new();
977        region.set_bytes("test.bin", 0, 0, &[1, 2, 3, 4, 5]);
978        region.clear_bytes(1, 2);
979        assert_eq!(region.bytes_at(0, 5), vec![1, 4, 5]);
980    }
981
982    #[test]
983    fn decode_at_does_not_require_bytes_at_zero() {
984        let mut region = Region::new();
985        region.set_bytes("test.bin", 0, 0x100, &[0x74, 0x42]);
986        let insn = region.decode_at(0x100).unwrap();
987        assert_eq!(insn.len(), 2);
988        assert_eq!(insn.as_string(), "MOV A,#0x42");
989    }
990
991    #[test]
992    fn decode_at_requires_full_instruction_length() {
993        let mut region = Region::new();
994        region.set_bytes("test.bin", 0, 0, &[0x02, 0x00]);
995        assert!(
996            region.decode_at(0).is_none(),
997            "Expected None, got {:?}",
998            region.decode_at(0).unwrap().as_string()
999        );
1000    }
1001
1002    #[test]
1003    fn branch_target_uses_implicit_label() {
1004        let mut region = Region::new();
1005        region.set_bytes("test.bin", 0, 0, &[0x12, 0x01, 0x6D, 0x02, 0x03, 0x04]);
1006        region.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
1007        region.set_equivalent(3, Equivalent::Code(vec![])).unwrap();
1008
1009        let mut collector = LabelCollector::default();
1010        region.collect_refs(AddressSpace::Code, &mut collector);
1011        let implicit_labels = collector.into_implicit_labels();
1012
1013        let lines = region.render(AddressSpace::Code, &implicit_labels);
1014        let insn = lines
1015            .iter()
1016            .find_map(|line| match line {
1017                Line::Instruction { addr: 0, text, .. } => Some(text.clone()),
1018                _ => None,
1019            })
1020            .unwrap();
1021        assert!(insn.contains("LCALL"), "{insn}");
1022        assert!(insn.contains("loc_016D"), "{insn}");
1023        assert!(!insn.contains("#0x016D"), "{insn}");
1024    }
1025
1026    #[test]
1027    fn operand_override_preserves_other_operands() {
1028        let mut region = Region::new();
1029        region.set_bytes("test.bin", 0, 0, &[0xB5, 0x20, 0x10]);
1030        region.set_label(0x13, "target");
1031        region
1032            .set_equivalent(
1033                0,
1034                Equivalent::Code(vec![
1035                    None,
1036                    None,
1037                    Some(OperandOverride::Label("target".into())),
1038                ]),
1039            )
1040            .unwrap();
1041        let implicit_labels = ImplicitLabels::default();
1042        let lines = region.render(AddressSpace::Code, &implicit_labels);
1043        let insn = lines
1044            .iter()
1045            .find_map(|line| match line {
1046                Line::Instruction { text, .. } => Some(text.clone()),
1047                _ => None,
1048            })
1049            .unwrap();
1050        assert!(insn.contains("0x20,target"));
1051    }
1052
1053    #[test]
1054    fn render_emits_org_after_unmapped_gap() {
1055        let mut region = Region::new();
1056        region.set_bytes("test.bin", 0, 0, &[1, 2, 3]);
1057        region.set_bytes("test.bin", 3, 0x10, &[4, 5]);
1058        let implicit_labels = ImplicitLabels::default();
1059        let lines = region.render(AddressSpace::Code, &implicit_labels);
1060        let orgs: Vec<_> = lines
1061            .iter()
1062            .filter_map(|line| match line {
1063                Line::Org { addr } => Some(*addr),
1064                _ => None,
1065            })
1066            .collect();
1067        assert_eq!(orgs, vec![0, 0x10]);
1068    }
1069
1070    #[test]
1071    fn space_usage_counts_code_data_and_undefined() {
1072        let mut region = Region::new();
1073        region.set_bytes(
1074            "test.bin",
1075            0,
1076            0,
1077            &[0x02, 0x00, 0x10, 0x74, 0x01, 0xFF, 0xFF],
1078        );
1079        region.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
1080        region.set_equivalent(3, Equivalent::Code(vec![])).unwrap();
1081        region
1082            .set_equivalent(5, Equivalent::Data(DataType::Word, 2))
1083            .unwrap();
1084
1085        assert_eq!(
1086            region.space_usage(),
1087            SpaceUsage {
1088                code: 5,
1089                data: 2,
1090                undefined: 0,
1091            }
1092        );
1093
1094        region.clear_equivalents(5, 2);
1095        assert_eq!(
1096            region.space_usage(),
1097            SpaceUsage {
1098                code: 5,
1099                data: 0,
1100                undefined: 2,
1101            }
1102        );
1103    }
1104}