Skip to main content

i8051_disassembler/
region.rs

1use i8051::{ControlFlow, Instruction};
2use std::collections::BTreeMap;
3use std::ops::RangeBounds;
4use std::range::Range;
5
6use crate::address::{
7    AddressRange, AddressSpace, AddressValue, PhysicalAddr, Xref, branch_target,
8    branch_target_operand_index, xrefs_from_instruction, xrefs_to_target,
9};
10use crate::command::Command;
11use crate::db::{
12    Equivalent, EquivalentAt, EquivalentKind, EquivalentRange, Error, Function, OperandOverride,
13    SpaceUsage,
14};
15use crate::labels::{ImplicitLabels, LabelCollector, Labels};
16use crate::pattern::BytePattern;
17use crate::render::Line;
18
19#[derive(Debug, Clone)]
20pub enum ByteRange {
21    Mapped(String, usize, Vec<u8>),
22    Constant(AddressValue, u8),
23}
24
25impl ByteRange {
26    pub fn len(&self) -> AddressValue {
27        match self {
28            ByteRange::Mapped(_, _, data) => data.len() as AddressValue,
29            ByteRange::Constant(size, _) => *size,
30        }
31    }
32}
33
34pub struct Region {
35    byte_ranges: BTreeMap<AddressValue, ByteRange>,
36    equivalents: BTreeMap<AddressValue, EquivalentRange>,
37    labels: BTreeMap<AddressValue, String>,
38    comments: BTreeMap<AddressValue, String>,
39    functions: BTreeMap<AddressValue, Function>,
40}
41
42impl Region {
43    pub fn new() -> Self {
44        Self {
45            byte_ranges: BTreeMap::new(),
46            equivalents: BTreeMap::new(),
47            labels: BTreeMap::new(),
48            comments: BTreeMap::new(),
49            functions: BTreeMap::new(),
50        }
51    }
52
53    pub fn set_bytes(
54        &mut self,
55        file: &str,
56        file_offset: usize,
57        offset: AddressValue,
58        bytes: &[u8],
59    ) {
60        self.map_bytes(file, file_offset, offset, bytes);
61    }
62
63    pub fn map_bytes(
64        &mut self,
65        file: &str,
66        file_offset: usize,
67        offset: AddressValue,
68        bytes: &[u8],
69    ) {
70        if !bytes.is_empty() {
71            self.clear_bytes(offset, bytes.len() as AddressValue);
72        }
73        self.byte_ranges.insert(
74            offset,
75            ByteRange::Mapped(file.to_string(), file_offset, bytes.to_vec()),
76        );
77    }
78
79    pub fn set_constant(&mut self, offset: AddressValue, size: AddressValue, value: u8) {
80        if size == 0 {
81            return;
82        }
83        self.clear_bytes(offset, size);
84        self.byte_ranges
85            .insert(offset, ByteRange::Constant(size, value));
86    }
87
88    pub fn find_bytes(&self, pattern: &BytePattern) -> impl Iterator<Item = Range<AddressValue>> {
89        self.find_bytes_in(pattern, ..)
90    }
91
92    /// Find bytes in a specific range (`..` searches the whole region).
93    ///
94    /// Cross-byte-range matches and constant-byte-range matches do not
95    /// currently work.
96    pub fn find_bytes_in(
97        &self,
98        pattern: &BytePattern,
99        range: impl RangeBounds<AddressValue>,
100    ) -> impl Iterator<Item = Range<AddressValue>> {
101        let range = AddressRange::from(range);
102        self.byte_ranges
103            .iter()
104            .map(move |(addr, byte_range)| match byte_range {
105                ByteRange::Mapped(_, _, data) => {
106                    if ranges_overlap_inclusive(
107                        range.start,
108                        range.end,
109                        *addr,
110                        *addr + data.len() as AddressValue,
111                    ) {
112                        let data_start = range.start.saturating_sub(*addr) as usize;
113                        let data_end = range
114                            .end
115                            .saturating_sub(*addr)
116                            .min((data.len() - 1) as AddressValue)
117                            as usize;
118                        Some(
119                            pattern
120                                .find_all(&data[data_start..=data_end])
121                                .map(move |range| {
122                                    Range::from(
123                                        range
124                                            .start
125                                            .saturating_add(data_start)
126                                            .saturating_add(*addr as usize)
127                                            ..range
128                                                .end
129                                                .saturating_add(data_start)
130                                                .saturating_add(*addr as usize),
131                                    )
132                                }),
133                        )
134                    } else {
135                        None
136                    }
137                }
138                ByteRange::Constant(..) => None,
139            })
140            .flatten()
141            .flatten()
142            .map(|range| Range::from(range.start as AddressValue..range.end as AddressValue))
143    }
144
145    pub(crate) fn snapshot_byte_ranges(
146        &self,
147        offset: AddressValue,
148        size: AddressValue,
149    ) -> Vec<(AddressValue, ByteRange)> {
150        if size == 0 {
151            return Vec::new();
152        }
153        let end = offset.saturating_add(size);
154        self.byte_ranges
155            .iter()
156            .filter(|(start, range)| range.len() > offset && **start < end)
157            .map(|(start, range)| (*start as AddressValue, range.clone()))
158            .collect()
159    }
160
161    pub fn clear_bytes(&mut self, offset: AddressValue, size: AddressValue) {
162        if size == 0 {
163            return;
164        }
165        let end = offset.saturating_add(size);
166        let mut kept = BTreeMap::new();
167
168        for (&start, range) in &self.byte_ranges {
169            match range {
170                ByteRange::Mapped(file, file_offset, data) => {
171                    let range_end = start.saturating_add(data.len() as AddressValue);
172                    if range_end <= offset || start >= end {
173                        kept.insert(start, range.clone());
174                        continue;
175                    }
176                    if start < offset {
177                        let keep_len = offset - start;
178                        kept.insert(
179                            start,
180                            ByteRange::Mapped(
181                                file.clone(),
182                                *file_offset,
183                                data[..keep_len as usize].to_vec(),
184                            ),
185                        );
186                    }
187                    if range_end > end {
188                        let skip = end.saturating_sub(start);
189                        kept.insert(
190                            end,
191                            ByteRange::Mapped(
192                                file.clone(),
193                                file_offset.saturating_add(skip as usize),
194                                data[skip as _..].to_vec(),
195                            ),
196                        );
197                    }
198                }
199                ByteRange::Constant(count, value) => {
200                    let range_end = start.saturating_add(*count);
201                    if range_end <= offset || start >= end {
202                        kept.insert(start, range.clone());
203                        continue;
204                    }
205                    if start < offset {
206                        kept.insert(start, ByteRange::Constant(offset - start, *value));
207                    }
208                    if range_end > end {
209                        kept.insert(end, ByteRange::Constant(range_end - end, *value));
210                    }
211                }
212            }
213        }
214
215        self.byte_ranges = kept;
216    }
217
218    pub fn set_equivalent(
219        &mut self,
220        offset: AddressValue,
221        equivalent: Equivalent,
222    ) -> Result<&EquivalentRange, Error> {
223        if self.has_equivalent(offset) {
224            return Err(Error::NotUndefined(offset));
225        }
226
227        let span = self.equivalent_span(offset, &equivalent)?;
228        self.validate_equivalent_bounds(offset, span)?;
229        self.validate_no_equivalent_overlap(offset, span)?;
230
231        self.equivalents.insert(
232            offset,
233            EquivalentRange {
234                end: offset.saturating_add(span),
235                equivalent,
236            },
237        );
238        Ok(&self.equivalents[&offset])
239    }
240
241    pub fn clear_equivalents(&mut self, offset: AddressValue, size: AddressValue) {
242        if size == 0 {
243            return;
244        }
245        let end = offset.saturating_add(size);
246        self.equivalents
247            .retain(|&start, range| range.end <= offset || start >= end);
248    }
249
250    pub fn snapshot_equivalents(
251        &self,
252        offset: AddressValue,
253        size: AddressValue,
254    ) -> Vec<(AddressValue, EquivalentRange)> {
255        if size == 0 {
256            return Vec::new();
257        }
258        let end = offset.saturating_add(size);
259        self.equivalents
260            .iter()
261            .filter(|(start, range)| ranges_overlap(**start, range.end, offset, end))
262            .map(|(&start, range)| (start, range.clone()))
263            .collect()
264    }
265
266    pub fn has_equivalent(&self, offset: AddressValue) -> bool {
267        if self.equivalents.contains_key(&offset) {
268            return true;
269        }
270        if let Some((&_, range)) = self.equivalents.range(..=offset).next_back() {
271            return offset < range.end;
272        }
273        return false;
274    }
275
276    pub fn has_equivalent_exact(&self, offset: AddressValue) -> bool {
277        if self.equivalents.contains_key(&offset) {
278            return true;
279        }
280        return false;
281    }
282
283    pub fn get_equivalent_kind(&self, offset: AddressValue) -> Option<EquivalentKind> {
284        if let Some(range) = self.equivalents.get(&offset) {
285            return Some(range.equivalent.kind());
286        }
287        if let Some((&_, range)) = self.equivalents.range(..=offset).next_back() {
288            if offset < range.end {
289                return Some(range.equivalent.kind());
290            }
291        }
292        return None;
293    }
294
295    pub fn get_equivalent_kind_exact(&self, offset: AddressValue) -> Option<EquivalentKind> {
296        if let Some(range) = self.equivalents.get(&offset) {
297            return Some(range.equivalent.kind());
298        }
299        return None;
300    }
301
302    pub fn get_equivalent(&self, offset: AddressValue) -> EquivalentAt<'_> {
303        self.equivalent_at(offset)
304    }
305
306    fn equivalent_at(&self, offset: AddressValue) -> EquivalentAt<'_> {
307        if let Some(range) = self.equivalents.get(&offset) {
308            return EquivalentAt::Defined {
309                start: offset,
310                range,
311            };
312        }
313        if let Some((&start, range)) = self.equivalents.range(..=offset).next_back() {
314            if offset < range.end {
315                return EquivalentAt::Defined { start, range };
316            }
317        }
318        EquivalentAt::Undefined(self.undefined_range_at(offset))
319    }
320
321    fn undefined_range_at(&self, offset: AddressValue) -> Range<AddressValue> {
322        let after = offset.saturating_add(1);
323        let next_eq = self.equivalents.range(after..).next().map(|(&k, _)| k);
324        let next_lbl = self.labels.range(after..).next().map(|(&k, _)| k);
325        let next_cmt = self.comments.range(after..).next().map(|(&k, _)| k);
326        let end = [Some(self.end()), next_eq, next_lbl, next_cmt]
327            .into_iter()
328            .flatten()
329            .min()
330            .unwrap_or(self.end());
331        (offset..end).into()
332    }
333
334    pub fn set_label(&mut self, offset: AddressValue, label: &str) {
335        self.labels
336            .insert(offset as AddressValue, label.to_string());
337    }
338
339    pub fn clear_label(&mut self, offset: AddressValue) {
340        self.labels.remove(&(offset as AddressValue));
341    }
342
343    pub fn get_label(&self, offset: AddressValue) -> Option<&str> {
344        self.labels
345            .get(&(offset as AddressValue))
346            .map(String::as_str)
347    }
348
349    pub fn set_comment(&mut self, offset: AddressValue, comment: &str) {
350        self.comments
351            .insert(offset as AddressValue, comment.to_string());
352    }
353
354    pub fn clear_comment(&mut self, offset: AddressValue) {
355        self.comments.remove(&(offset as AddressValue));
356    }
357
358    pub fn get_comment(&self, offset: AddressValue) -> Option<&str> {
359        self.comments
360            .get(&(offset as AddressValue))
361            .map(String::as_str)
362    }
363
364    pub fn set_function(&mut self, function: Function) {
365        self.functions
366            .insert(function.addr.offset as AddressValue, function);
367    }
368
369    pub fn get_function(&self, offset: AddressValue) -> Option<&Function> {
370        self.functions.get(&(offset as AddressValue))
371    }
372
373    pub fn clear_function(&mut self, offset: AddressValue) {
374        self.functions.remove(&(offset as AddressValue));
375    }
376
377    pub fn byte_at(&self, offset: AddressValue) -> Option<u8> {
378        self.read_byte(offset)
379    }
380
381    pub fn read_u16_le(&self, offset: AddressValue) -> Option<u16> {
382        let low = self.read_byte(offset)?;
383        let high = self.read_byte(offset.saturating_add(1))?;
384        Some((low as u16) | ((high as u16) << 8))
385    }
386
387    pub fn read_u16_be(&self, offset: AddressValue) -> Option<u16> {
388        let high = self.read_byte(offset)?;
389        let low = self.read_byte(offset.saturating_add(1))?;
390        Some((high as u16) | ((low as u16) << 8))
391    }
392
393    pub fn bytes_at(&self, offset: AddressValue, size: AddressValue) -> Vec<u8> {
394        (0..size)
395            .filter_map(|i| self.read_byte(offset + i))
396            .collect()
397    }
398
399    /// Count mapped bytes classified as code, data, or undefined (no equivalent).
400    pub fn space_usage(&self) -> SpaceUsage {
401        let mut usage = SpaceUsage::default();
402        for (&start, range) in &self.equivalents {
403            let span = range.end.saturating_sub(start);
404            match &range.equivalent {
405                Equivalent::Code(_) => usage.code += span,
406                Equivalent::Data(_, _) => usage.data += span,
407            }
408        }
409
410        let mapped: AddressValue = self
411            .byte_ranges
412            .iter()
413            .map(|(&start, range)| range.len().saturating_sub(start))
414            .sum();
415
416        usage.undefined = mapped.saturating_sub(usage.code).saturating_sub(usage.data);
417
418        usage
419    }
420
421    pub(crate) fn render(
422        &self,
423        space: AddressSpace,
424        implicit_labels: &ImplicitLabels,
425    ) -> Vec<Line> {
426        let mut lines = Vec::new();
427        let start = self.start();
428        let end = self.end();
429        if start >= end {
430            return lines;
431        }
432
433        let default_labels = Labels::default();
434        let labels = implicit_labels.get(&space).unwrap_or(&default_labels);
435
436        let mut addr = start;
437        let mut need_org = true;
438        while addr < end {
439            if need_org {
440                lines.push(Line::Org { addr });
441                lines.push(Line::Blank);
442                need_org = false;
443            }
444
445            if let Some(function) = self.get_function(addr) {
446                lines.push(Line::Function {
447                    addr,
448                    name: function.name.clone(),
449                    signature: function.signature.clone(),
450                    length: function.length,
451                    noreturn: function.noreturn,
452                });
453            }
454            if let Some(comment) = self.get_comment(addr) {
455                lines.push(Line::Comment {
456                    addr,
457                    text: comment.to_string(),
458                });
459            }
460            if let Some(label) = self.get_label(addr) {
461                lines.push(Line::Label {
462                    addr,
463                    name: label.to_string(),
464                });
465            } else if let Some(label) = labels.get(&addr) {
466                lines.push(Line::Label {
467                    addr,
468                    name: label.to_string(),
469                });
470            }
471
472            match self.get_equivalent(addr) {
473                EquivalentAt::Defined { start: _, range } => match &range.equivalent {
474                    Equivalent::Code(overrides) => {
475                        let insn = self
476                            .decode_at(addr)
477                            .expect("validated code equivalent must decode");
478                        let text = self.format_instruction(addr, &insn, overrides, labels);
479                        lines.push(Line::Instruction {
480                            addr,
481                            direct: insn.direct_addr(),
482                            text,
483                            bytes: insn.bytes().to_vec(),
484                        });
485                        addr = range.end;
486                    }
487                    Equivalent::Data(data_type, size) => {
488                        let bytes = self.bytes_at(addr, *size);
489                        lines.push(Line::Data {
490                            addr,
491                            data_type: data_type.clone(),
492                            bytes,
493                        });
494                        addr = range.end;
495                    }
496                },
497                EquivalentAt::Undefined(undefined) => {
498                    let span = self.raw_run_until_next_annotation(addr, undefined.end, &labels);
499                    if span == 0 {
500                        if let Some((&next_mapped, _)) =
501                            self.byte_ranges.range(addr.saturating_add(1)..).next()
502                        {
503                            if next_mapped < undefined.end {
504                                addr = next_mapped;
505                                need_org = true;
506                                continue;
507                            }
508                        }
509                        addr += 1;
510                        continue;
511                    }
512                    let bytes = self.bytes_at(addr, span);
513                    lines.push(Line::Raw { addr, bytes });
514                    addr += span;
515                }
516            }
517        }
518
519        lines
520    }
521
522    pub(crate) fn to_commands(&self, space: AddressSpace) -> Vec<Command> {
523        let mut commands = Vec::new();
524        for (&offset, range) in &self.byte_ranges {
525            match range {
526                ByteRange::Mapped(file, file_offset, data) => {
527                    commands.push(Command::map_bytes(
528                        space,
529                        offset,
530                        file.clone(),
531                        *file_offset,
532                        data.len() as AddressValue,
533                    ));
534                }
535                ByteRange::Constant(size, value) => {
536                    commands.push(Command::set_constant_bytes(space, offset, *size, *value));
537                }
538            }
539        }
540        for (&offset, equivalent_range) in &self.equivalents {
541            commands.push(Command::set_equivalent(
542                space,
543                offset,
544                equivalent_range.equivalent.clone(),
545            ));
546        }
547        for (&offset, label) in &self.labels {
548            commands.push(Command::set_label(space, offset, label.clone()));
549        }
550        for (&offset, comment) in &self.comments {
551            commands.push(Command::set_comment(space, offset, comment.clone()));
552        }
553        for (&offset, function) in &self.functions {
554            commands.push(Command::set_function(space, offset, function.clone()));
555        }
556        commands
557    }
558
559    pub(crate) fn xrefs_to(&self, space: AddressSpace, target: &PhysicalAddr) -> Vec<Xref> {
560        let mut xrefs = Vec::new();
561        for (&offset, equivalent_range) in &self.equivalents {
562            if !matches!(equivalent_range.equivalent, Equivalent::Code(_)) {
563                continue;
564            }
565            let Some(instruction) = self.decode_at(offset) else {
566                continue;
567            };
568            let source = PhysicalAddr {
569                space,
570                offset: offset as AddressValue,
571            };
572            xrefs.extend(xrefs_to_target(&instruction, source, target));
573        }
574        xrefs
575    }
576
577    pub(crate) fn xrefs_from(&self, source: &PhysicalAddr) -> Vec<Xref> {
578        let offset = source.offset;
579        let Some(equivalent_range) = self.equivalents.get(&offset) else {
580            return Vec::new();
581        };
582        if !matches!(equivalent_range.equivalent, Equivalent::Code(_)) {
583            return Vec::new();
584        }
585        let Some(instruction) = self.decode_at(offset) else {
586            return Vec::new();
587        };
588        xrefs_from_instruction(&instruction, *source)
589    }
590
591    /// Collect all the necessary references for this region.
592    pub(crate) fn collect_refs(&self, space: AddressSpace, refs: &mut LabelCollector) {
593        for (&offset, equivalent_range) in &self.equivalents {
594            if !matches!(equivalent_range.equivalent, Equivalent::Code(_)) {
595                continue;
596            }
597            let Some(instruction) = self.decode_at(offset) else {
598                continue;
599            };
600            xrefs_from_instruction(&instruction, PhysicalAddr { space, offset })
601                .into_iter()
602                .for_each(|xref| {
603                    if self.get_label(xref.to.offset).is_none() {
604                        refs.collect(xref.to.space, xref.to.offset, None);
605                    }
606                });
607        }
608    }
609
610    fn start(&self) -> AddressValue {
611        [
612            self.byte_ranges.keys().copied().next(),
613            self.equivalents.keys().copied().next(),
614            self.labels.keys().copied().next(),
615            self.comments.keys().copied().next(),
616        ]
617        .into_iter()
618        .flatten()
619        .min()
620        .unwrap_or(0) as AddressValue
621    }
622
623    /// Upper bound (exclusive) of mapped bytes and equivalents.
624    fn end(&self) -> AddressValue {
625        [
626            self.byte_ranges
627                .iter()
628                .next_back()
629                .map(|(&start, range)| start + range.len()),
630            self.equivalents
631                .iter()
632                .next_back()
633                .map(|(&start, range)| start + range.end),
634            self.labels.keys().next_back().copied(),
635            self.comments.keys().next_back().copied(),
636            self.functions.keys().next_back().copied(),
637        ]
638        .into_iter()
639        .flatten()
640        .max()
641        .map(|addr| addr + 1)
642        .unwrap_or(0)
643    }
644
645    fn read_byte(&self, offset: AddressValue) -> Option<u8> {
646        let (&start, range) = self.byte_ranges.range(..=offset).next_back()?;
647        match range {
648            ByteRange::Mapped(_, _, data) => data.get((offset - start) as usize).copied(),
649            ByteRange::Constant(size, value) if offset - start < *size => Some(*value),
650            ByteRange::Constant(_, _) => None,
651        }
652    }
653
654    fn decode_at(&self, address: AddressValue) -> Option<Instruction> {
655        let mut available = Vec::with_capacity(Instruction::MAX_LENGTH);
656
657        for i in 0..Instruction::MAX_LENGTH as AddressValue {
658            if let Some(b) = self.read_byte(address + i) {
659                available.push(b);
660            } else {
661                break;
662            }
663        }
664        if available.is_empty() {
665            return None;
666        }
667
668        let ins = Instruction::decode_from_bytes(address as _, &available);
669        if ins.len() > available.len() {
670            return None;
671        }
672
673        Some(ins)
674    }
675
676    pub(crate) fn equivalent_span(
677        &self,
678        offset: AddressValue,
679        equivalent: &Equivalent,
680    ) -> Result<AddressValue, Error> {
681        match equivalent {
682            Equivalent::Code(_) => self
683                .decode_at(offset)
684                .map(|insn| insn.len() as AddressValue)
685                .ok_or(Error::InvalidEquivalent),
686            Equivalent::Data(_, size) => Ok(*size),
687        }
688    }
689
690    fn validate_equivalent_bounds(
691        &self,
692        offset: AddressValue,
693        span: AddressValue,
694    ) -> Result<(), Error> {
695        for i in 0..span {
696            if self.read_byte(offset + i).is_none() {
697                return Err(Error::InvalidAddress(offset + i));
698            }
699        }
700        Ok(())
701    }
702
703    fn validate_no_equivalent_overlap(
704        &self,
705        offset: AddressValue,
706        span: AddressValue,
707    ) -> Result<(), Error> {
708        let end = offset.saturating_add(span);
709        if let Some((&other_start, other)) = self.equivalents.range(..end).next_back() {
710            if other.end > offset {
711                return Err(Error::Overlap(other_start));
712            }
713        }
714        Ok(())
715    }
716
717    fn raw_run_until_next_annotation(
718        &self,
719        addr: AddressValue,
720        limit: AddressValue,
721        implicit_labels: &Labels,
722    ) -> AddressValue {
723        let after = addr.saturating_add(1);
724        let mut boundary = limit;
725        if let Some((&start, _)) = self.equivalents.range(after..).next() {
726            boundary = boundary.min(start);
727        }
728        if let Some((&start, _)) = self.labels.range(after..).next() {
729            boundary = boundary.min(start);
730        }
731        if let Some((&start, _)) = self.comments.range(after..).next() {
732            boundary = boundary.min(start);
733        }
734        if let Some((&start, _)) = implicit_labels.range(after..).next() {
735            boundary = boundary.min(start);
736        }
737
738        let mut end = addr;
739        while end < boundary {
740            if self.read_byte(end).is_none() {
741                break;
742            }
743            end += 1;
744        }
745        end - addr
746    }
747
748    fn format_instruction(
749        &self,
750        _addr: AddressValue,
751        insn: &Instruction,
752        overrides: &[Option<OperandOverride>],
753        implicit_labels: &Labels,
754    ) -> String {
755        let decoded = insn.as_string();
756        let mut merged = overrides.to_vec();
757
758        if let (Some(target), Some(idx)) = (branch_target(insn), branch_target_operand_index(insn))
759        {
760            while merged.len() <= idx {
761                merged.push(None);
762            }
763            if merged[idx].is_none() {
764                let label = self
765                    .get_label(target)
766                    .or_else(|| implicit_labels.get(&target).map(String::as_str));
767                if let Some(label) = label {
768                    merged[idx] = Some(OperandOverride::Label(label.to_string()));
769                }
770            }
771        }
772
773        let text = if merged.iter().all(|o| o.is_none()) {
774            decoded.to_string()
775        } else {
776            apply_operand_overrides(&decoded, &merged)
777        };
778        sdas_indent_instruction(&text)
779    }
780
781    /// Auto-disassembles code addresses recursively. Will not modify any address that already
782    /// has an equivalent.
783    pub fn auto_disassemble(&mut self, start: u32) -> AutoDisassembleResult {
784        let mut queue = Vec::new();
785        let mut result = AutoDisassembleResult::default();
786        queue.push(start);
787        while let Some(addr) = queue.pop() {
788            match self.get_equivalent_kind(addr) {
789                Some(EquivalentKind::Code) => {
790                    // Exact address means we ran into an existing code block
791                    // successfully
792                    if self.equivalents.contains_key(&addr) {
793                        continue;
794                    }
795                    result
796                        .errors
797                        .push((addr, AutoDisassembleError::Overlapped(EquivalentKind::Code)));
798                }
799                Some(EquivalentKind::Data) => {
800                    result
801                        .errors
802                        .push((addr, AutoDisassembleError::Overlapped(EquivalentKind::Data)));
803                    continue;
804                }
805                None => {}
806            }
807            let Ok(_) = self.set_equivalent(addr, Equivalent::Code(vec![])) else {
808                result
809                    .errors
810                    .push((addr, AutoDisassembleError::Overlapped(EquivalentKind::Code)));
811                continue;
812            };
813            result.success.push(addr);
814            if let Some(ins) = self.decode_at(addr) {
815                let flow = ins.control_flow();
816                match flow {
817                    ControlFlow::Continue { next } => queue.push(next),
818                    ControlFlow::Jump { target } => queue.push(target),
819                    ControlFlow::Call { target, return_pc } => {
820                        queue.push(return_pc);
821                        queue.push(target);
822                    }
823                    ControlFlow::Choice {
824                        fall_through,
825                        branch_target,
826                    } => {
827                        queue.push(fall_through);
828                        queue.push(branch_target);
829                    }
830                    ControlFlow::Diverge => {
831                        continue;
832                    }
833                }
834            }
835        }
836        result
837    }
838}
839
840fn sdas_indent_instruction(text: &str) -> String {
841    if let Some((mnemonic, operands)) = text.split_once(' ') {
842        format!("    {mnemonic:<8}{operands}")
843    } else {
844        format!("    {text}")
845    }
846}
847
848fn split_instruction(decoded: &str) -> (&str, Vec<&str>) {
849    let (mnemonic, rest) = decoded.split_once(' ').unwrap_or((decoded, ""));
850    let operands = if rest.is_empty() {
851        Vec::new()
852    } else {
853        rest.split(',').map(str::trim).collect()
854    };
855    (mnemonic, operands)
856}
857
858fn apply_operand_overrides(decoded: &str, overrides: &[Option<OperandOverride>]) -> String {
859    if overrides.is_empty() {
860        return decoded.to_string();
861    }
862    let (mnemonic, operands) = split_instruction(decoded);
863    let mut out = mnemonic.to_string();
864    let operand_count = operands.len().max(overrides.len());
865    for idx in 0..operand_count {
866        if idx > 0 {
867            out.push(',');
868        } else {
869            out.push(' ');
870        }
871        match overrides.get(idx).and_then(|o| o.as_ref()) {
872            Some(OperandOverride::Label(label)) => out.push_str(label),
873            Some(OperandOverride::LabelOffset { label, offset }) => {
874                if *offset >= 0 {
875                    out.push_str(&format!("{label}+{offset}"));
876                } else {
877                    out.push_str(&format!("{label}{offset}"));
878                }
879            }
880            Some(OperandOverride::Text(text)) => out.push_str(text),
881            None => {
882                if let Some(default) = operands.get(idx) {
883                    out.push_str(default);
884                }
885            }
886        }
887    }
888    out
889}
890
891/// A and B are inclusive start, exclusive end.
892fn ranges_overlap(
893    a_start: AddressValue,
894    a_end: AddressValue,
895    b_start: AddressValue,
896    b_end: AddressValue,
897) -> bool {
898    a_start < b_end && b_start < a_end
899}
900
901/// A and B are inclusive start, inclusive end.
902fn ranges_overlap_inclusive(
903    a_start: AddressValue,
904    a_end: AddressValue,
905    b_start: AddressValue,
906    b_end: AddressValue,
907) -> bool {
908    a_start <= b_end && b_start <= a_end
909}
910
911#[derive(Debug, Clone, PartialEq, Eq)]
912pub enum AutoDisassembleError {
913    /// Adding an instruction would have overlapped non-code bytes, or partially
914    /// overlapped other code.
915    Overlapped(EquivalentKind),
916}
917
918/// Result of auto-disassembling a region.
919#[must_use]
920#[derive(Debug, Clone, PartialEq, Eq, Default)]
921pub struct AutoDisassembleResult {
922    pub success: Vec<AddressValue>,
923    pub errors: Vec<(AddressValue, AutoDisassembleError)>,
924}
925
926impl AutoDisassembleResult {
927    pub fn is_success(&self) -> bool {
928        self.errors.is_empty()
929    }
930
931    pub fn unwrap_success(self) -> Vec<AddressValue> {
932        if self.errors.is_empty() {
933            self.success
934        } else {
935            if let Some(error) = self.errors.first() {
936                panic!("Auto-disassembly failed (first error at {:04X}))", error.0);
937            }
938            panic!("Auto-disassembly partially failed");
939        }
940    }
941}
942
943#[cfg(test)]
944mod tests {
945    use super::*;
946    use crate::address::AddressSpace;
947    use crate::db::{DataType, OperandOverride, SpaceUsage};
948
949    #[test]
950    fn overlapping_equivalents_are_rejected() {
951        let mut region = Region::new();
952        region.set_bytes(
953            "test.bin",
954            0,
955            0,
956            &[0x02, 0x00, 0x10, 0x74, 0x01, 0x00, 0x00, 0x00],
957        );
958        region.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
959        assert!(matches!(
960            region.set_equivalent(1, Equivalent::Code(vec![])),
961            Err(Error::NotUndefined(1))
962        ));
963        region.set_equivalent(6, Equivalent::Code(vec![])).unwrap();
964        assert!(matches!(
965            region.set_equivalent(4, Equivalent::Data(DataType::Byte, 3)),
966            Err(Error::Overlap(6))
967        ));
968    }
969
970    #[test]
971    fn clear_bytes_splits_straddling_range() {
972        let mut region = Region::new();
973        region.set_bytes("test.bin", 0, 0, &[1, 2, 3, 4, 5]);
974        region.clear_bytes(1, 2);
975        assert_eq!(region.bytes_at(0, 5), vec![1, 4, 5]);
976    }
977
978    #[test]
979    fn decode_at_does_not_require_bytes_at_zero() {
980        let mut region = Region::new();
981        region.set_bytes("test.bin", 0, 0x100, &[0x74, 0x42]);
982        let insn = region.decode_at(0x100).unwrap();
983        assert_eq!(insn.len(), 2);
984        assert_eq!(insn.as_string(), "MOV A,#0x42");
985    }
986
987    #[test]
988    fn decode_at_requires_full_instruction_length() {
989        let mut region = Region::new();
990        region.set_bytes("test.bin", 0, 0, &[0x02, 0x00]);
991        assert!(
992            region.decode_at(0).is_none(),
993            "Expected None, got {:?}",
994            region.decode_at(0).unwrap().as_string()
995        );
996    }
997
998    #[test]
999    fn branch_target_uses_implicit_label() {
1000        let mut region = Region::new();
1001        region.set_bytes("test.bin", 0, 0, &[0x12, 0x01, 0x6D, 0x02, 0x03, 0x04]);
1002        region.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
1003        region.set_equivalent(3, Equivalent::Code(vec![])).unwrap();
1004
1005        let mut collector = LabelCollector::default();
1006        region.collect_refs(AddressSpace::Code, &mut collector);
1007        let implicit_labels = collector.into_implicit_labels();
1008
1009        let lines = region.render(AddressSpace::Code, &implicit_labels);
1010        let insn = lines
1011            .iter()
1012            .find_map(|line| match line {
1013                Line::Instruction { addr: 0, text, .. } => Some(text.clone()),
1014                _ => None,
1015            })
1016            .unwrap();
1017        assert!(insn.contains("LCALL"), "{insn}");
1018        assert!(insn.contains("loc_016D"), "{insn}");
1019        assert!(!insn.contains("#0x016D"), "{insn}");
1020    }
1021
1022    #[test]
1023    fn operand_override_preserves_other_operands() {
1024        let mut region = Region::new();
1025        region.set_bytes("test.bin", 0, 0, &[0xB5, 0x20, 0x10]);
1026        region.set_label(0x13, "target");
1027        region
1028            .set_equivalent(
1029                0,
1030                Equivalent::Code(vec![
1031                    None,
1032                    None,
1033                    Some(OperandOverride::Label("target".into())),
1034                ]),
1035            )
1036            .unwrap();
1037        let implicit_labels = ImplicitLabels::default();
1038        let lines = region.render(AddressSpace::Code, &implicit_labels);
1039        let insn = lines
1040            .iter()
1041            .find_map(|line| match line {
1042                Line::Instruction { text, .. } => Some(text.clone()),
1043                _ => None,
1044            })
1045            .unwrap();
1046        assert!(insn.contains("0x20,target"));
1047    }
1048
1049    #[test]
1050    fn render_emits_org_after_unmapped_gap() {
1051        let mut region = Region::new();
1052        region.set_bytes("test.bin", 0, 0, &[1, 2, 3]);
1053        region.set_bytes("test.bin", 3, 0x10, &[4, 5]);
1054        let implicit_labels = ImplicitLabels::default();
1055        let lines = region.render(AddressSpace::Code, &implicit_labels);
1056        let orgs: Vec<_> = lines
1057            .iter()
1058            .filter_map(|line| match line {
1059                Line::Org { addr } => Some(*addr),
1060                _ => None,
1061            })
1062            .collect();
1063        assert_eq!(orgs, vec![0, 0x10]);
1064    }
1065
1066    #[test]
1067    fn space_usage_counts_code_data_and_undefined() {
1068        let mut region = Region::new();
1069        region.set_bytes(
1070            "test.bin",
1071            0,
1072            0,
1073            &[0x02, 0x00, 0x10, 0x74, 0x01, 0xFF, 0xFF],
1074        );
1075        region.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
1076        region.set_equivalent(3, Equivalent::Code(vec![])).unwrap();
1077        region
1078            .set_equivalent(5, Equivalent::Data(DataType::Word, 2))
1079            .unwrap();
1080
1081        assert_eq!(
1082            region.space_usage(),
1083            SpaceUsage {
1084                code: 5,
1085                data: 2,
1086                undefined: 0,
1087            }
1088        );
1089
1090        region.clear_equivalents(5, 2);
1091        assert_eq!(
1092            region.space_usage(),
1093            SpaceUsage {
1094                code: 5,
1095                data: 0,
1096                undefined: 2,
1097            }
1098        );
1099    }
1100}