Skip to main content

i8051_disassembler/
db.rs

1use std::collections::BTreeMap;
2use std::range::Range;
3
4use serde::{Deserialize, Serialize};
5
6use crate::address::{AREA_ORDER, AddressSpace, AddressValue, PhysicalAddr, Xref};
7use crate::command::{Command, Environment};
8use crate::labels::{ImplicitLabels, LabelCollector};
9pub use crate::region::{ByteRange, Region};
10use crate::render::Line;
11use crate::render::sdas::SdasWriter;
12
13pub struct Db {
14    regions: BTreeMap<AddressSpace, Region>,
15}
16
17impl Db {
18    pub fn new() -> Self {
19        Self {
20            regions: BTreeMap::new(),
21        }
22    }
23
24    pub fn region(&self, space: AddressSpace) -> Option<&Region> {
25        self.regions.get(&space)
26    }
27
28    pub fn region_mut(&mut self, space: AddressSpace) -> &mut Region {
29        self.regions.entry(space).or_insert_with(Region::new)
30    }
31
32    pub fn xrefs_to(&self, target: &PhysicalAddr) -> Vec<Xref> {
33        let mut xrefs = Vec::new();
34        for (&space, region) in &self.regions {
35            xrefs.extend(region.xrefs_to(space, target));
36        }
37        xrefs
38    }
39
40    pub fn xrefs_from(&self, source: &PhysicalAddr) -> Vec<Xref> {
41        let Some(region) = self.regions.get(&source.space) else {
42            return Vec::new();
43        };
44        region.xrefs_from(source)
45    }
46
47    fn implicit_labels(&self) -> ImplicitLabels {
48        let mut label_collector = LabelCollector::default();
49        for (&space, region) in &self.regions {
50            region.collect_refs(space, &mut label_collector);
51        }
52        label_collector.into_implicit_labels()
53    }
54
55    pub fn render(&self, space: AddressSpace) -> Vec<Line> {
56        let implicit_labels = self.implicit_labels();
57
58        self.regions
59            .get(&space)
60            .map(|region| region.render(space, &implicit_labels))
61            .unwrap_or_default()
62    }
63
64    pub fn render_range(
65        &self,
66        space: AddressSpace,
67        start: AddressValue,
68        end: AddressValue,
69    ) -> Vec<Line> {
70        self.render(space)
71            .into_iter()
72            .filter(|line| {
73                let addr = line.addr();
74                addr >= start && addr < end
75            })
76            .collect()
77    }
78
79    pub fn to_sdas(&self) -> String {
80        let mut writer = SdasWriter::default();
81        let implicit_labels = self.implicit_labels();
82
83        for &space in &AREA_ORDER {
84            let Some(region) = self.regions.get(&space) else {
85                continue;
86            };
87            writer.write(space.area_header());
88            for line in region.render(space, &implicit_labels) {
89                writer.write_line(&line);
90            }
91        }
92
93        writer.into_string()
94    }
95
96    pub fn to_commands(&self) -> Vec<Command> {
97        let mut commands = Vec::new();
98        for (&space, region) in &self.regions {
99            commands.extend(region.to_commands(space));
100        }
101        commands
102    }
103
104    pub fn apply(
105        &mut self,
106        command: Command,
107        env: Option<&dyn Environment>,
108    ) -> Result<Vec<Command>, Error> {
109        command.apply(self, env)
110    }
111
112    /// Byte counts for mapped content classified by equivalent kind.
113    pub fn space_usage(&self, space: AddressSpace) -> SpaceUsage {
114        self.regions
115            .get(&space)
116            .map(Region::space_usage)
117            .unwrap_or_default()
118    }
119}
120
121impl Default for Db {
122    fn default() -> Self {
123        Self::new()
124    }
125}
126
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct Function {
129    pub addr: PhysicalAddr,
130    pub name: String,
131    pub signature: Option<String>,
132    pub length: AddressValue,
133    pub noreturn: bool,
134}
135
136#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
137pub enum DataType {
138    Byte,
139    Word,
140    Dword,
141    Qword,
142    Reference(Box<DataType>),
143    Equivalent(Box<DataType>, String),
144    Array(Box<DataType>, usize),
145    String(usize),
146    Struct(Vec<DataType>),
147}
148
149#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
150pub enum OperandOverride {
151    Label(String),
152    LabelOffset { label: String, offset: i32 },
153    Text(String),
154}
155
156#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
157pub enum Equivalent {
158    Code(Vec<Option<OperandOverride>>),
159    Data(DataType, AddressValue),
160}
161
162impl Equivalent {
163    pub fn kind(&self) -> EquivalentKind {
164        match self {
165            Self::Code(_) => EquivalentKind::Code,
166            Self::Data(_, _) => EquivalentKind::Data,
167        }
168    }
169}
170
171#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
172pub enum EquivalentKind {
173    Code,
174    Data,
175}
176
177#[derive(Debug, Clone, Eq, PartialEq)]
178pub struct EquivalentRange {
179    pub end: AddressValue,
180    pub equivalent: Equivalent,
181}
182
183#[derive(Debug, Clone, Eq, PartialEq)]
184pub enum EquivalentAt<'a> {
185    Undefined(Range<AddressValue>),
186    Defined {
187        start: AddressValue,
188        range: &'a EquivalentRange,
189    },
190}
191
192impl<'a> EquivalentAt<'a> {
193    pub fn is_defined(&self) -> bool {
194        matches!(self, Self::Defined { .. })
195    }
196}
197
198#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
199pub struct SpaceUsage {
200    /// Bytes covered by a `Equivalent::Code` range.
201    pub code: AddressValue,
202    /// Bytes covered by a `Equivalent::Data` range.
203    pub data: AddressValue,
204    /// Mapped bytes with no equivalent (rendered as raw data).
205    pub undefined: AddressValue,
206}
207
208impl SpaceUsage {
209    pub fn total(&self) -> AddressValue {
210        self.code
211            .saturating_add(self.data)
212            .saturating_add(self.undefined)
213    }
214}
215
216#[derive(Debug)]
217pub enum Error {
218    NoEnvironment,
219    Overlap(AddressValue),
220    InvalidAddress(AddressValue),
221    InvalidEquivalent,
222    NotUndefined(AddressValue),
223    Io(std::io::Error),
224}
225
226#[cfg(test)]
227mod tests {
228    use std::collections::HashMap;
229    use std::io;
230
231    use super::*;
232    use crate::address::XrefType;
233    use crate::command::Command;
234    use pretty_assertions::assert_eq;
235
236    static TEST_BINARY: [u8; 12] = [
237        0x02, 0x00, 0x10, // LJMP 0x10
238        0x74, 0x01, // MOV A, #0x1
239        0xF0, // MOVX @DPTR, A
240        0x90, 0x00, 0x10, // MOV DPTR, #0x10
241        0x93, // MOVC A, @A+DPTR
242        0x80, 0xF7, // SJMP 0x3 (rel = 3 - (10 + 2))
243    ];
244
245    struct TestEnvironment {
246        files: HashMap<String, Vec<u8>>,
247    }
248
249    impl TestEnvironment {
250        fn new() -> Self {
251            Self {
252                files: HashMap::new(),
253            }
254        }
255
256        fn with_file(mut self, name: impl Into<String>, bytes: Vec<u8>) -> Self {
257            self.files.insert(name.into(), bytes);
258            self
259        }
260    }
261
262    impl Environment for TestEnvironment {
263        fn load_file_bytes(
264            &self,
265            file: &str,
266            offset: usize,
267            size: AddressValue,
268        ) -> Result<Vec<u8>, io::Error> {
269            let data = self.files.get(file).ok_or_else(|| {
270                io::Error::new(io::ErrorKind::NotFound, format!("file not found: {file}"))
271            })?;
272            let end = offset.saturating_add(size as usize);
273            if end > data.len() {
274                return Err(io::Error::new(
275                    io::ErrorKind::UnexpectedEof,
276                    "read past end of file",
277                ));
278            }
279            Ok(data[offset..end].to_vec())
280        }
281    }
282
283    fn apply_all(db: &mut Db, commands: Vec<Command>, env: &TestEnvironment) {
284        for command in commands {
285            db.apply(command, Some(env)).unwrap();
286        }
287    }
288
289    fn make_test_db() -> Db {
290        let mut db = Db::new();
291
292        let code = db.region_mut(AddressSpace::Code);
293        code.set_bytes("test.bin", 0, 0, &TEST_BINARY);
294
295        code.set_label(0, "start");
296        code.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
297
298        code.set_comment(3, "Start of loop");
299        code.set_label(3, "loop");
300        code.set_equivalent(3, Equivalent::Code(vec![])).unwrap();
301        code.set_equivalent(5, Equivalent::Code(vec![])).unwrap();
302        code.set_equivalent(6, Equivalent::Code(vec![])).unwrap();
303        code.set_equivalent(9, Equivalent::Code(vec![])).unwrap();
304        code.set_equivalent(10, Equivalent::Code(vec![])).unwrap();
305        db
306    }
307
308    #[test]
309    fn test_db() {
310        let db = make_test_db();
311        assert_eq!(
312            db.xrefs_to(&PhysicalAddr {
313                space: AddressSpace::Code,
314                offset: 3
315            }),
316            vec![Xref {
317                xref_type: XrefType::Jump,
318                from: PhysicalAddr {
319                    space: AddressSpace::Code,
320                    offset: 10
321                },
322                to: PhysicalAddr {
323                    space: AddressSpace::Code,
324                    offset: 3
325                },
326            }]
327        );
328
329        assert_eq!(
330            db.xrefs_from(&PhysicalAddr {
331                space: AddressSpace::Code,
332                offset: 10
333            }),
334            vec![Xref {
335                xref_type: XrefType::Jump,
336                from: PhysicalAddr {
337                    space: AddressSpace::Code,
338                    offset: 10
339                },
340                to: PhysicalAddr {
341                    space: AddressSpace::Code,
342                    offset: 3
343                },
344            }]
345        );
346
347        let expected = concat!(
348            ".area CODE (CODE,ABS)\n",
349            ".org 0x0\n",
350            "\n",
351            "start:\n",
352            "    LJMP    loc_0010\n",
353            "; Start of loop\n",
354            "loop:\n",
355            "    MOV     A,#01\n",
356            "    MOVX    @DPTR,A\n",
357            "    MOV     DPTR,#0x0010\n",
358            "    MOVC    A,@A+DPTR\n",
359            "    SJMP    loop\n",
360            "loc_0010:\n"
361        );
362        assert_eq!(db.to_sdas(), expected);
363    }
364
365    #[test]
366    fn test_db_to_commands() {
367        let db = make_test_db();
368        let commands = db.to_commands();
369        let env = TestEnvironment::new().with_file("test.bin", TEST_BINARY.to_vec());
370        let mut new_db = Db::new();
371        for command in commands {
372            let env =
373                matches!(command, Command::MapBytes { .. }).then_some(&env as &dyn Environment);
374            new_db.apply(command, env).expect("command should apply");
375        }
376        assert_eq!(new_db.to_sdas(), db.to_sdas());
377    }
378
379    #[test]
380    fn test_db_space_usage() {
381        let db = make_test_db();
382        assert_eq!(
383            db.space_usage(AddressSpace::Code),
384            SpaceUsage {
385                code: 12,
386                data: 0,
387                undefined: 0,
388            }
389        );
390    }
391
392    #[test]
393    fn map_bytes_command_undo() {
394        let env = TestEnvironment::new()
395            .with_file("test.bin", vec![1, 2, 3])
396            .with_file("other.bin", vec![4, 5]);
397        let mut db = Db::new();
398        db.apply(
399            Command::map_bytes(AddressSpace::Code, 0, "test.bin", 0, 3),
400            Some(&env),
401        )
402        .unwrap();
403
404        let code = db.region(AddressSpace::Code).unwrap();
405        assert_eq!(code.bytes_at(0, 3), vec![1, 2, 3]);
406
407        let undo = db
408            .apply(
409                Command::map_bytes(AddressSpace::Code, 0, "other.bin", 0, 2),
410                Some(&env),
411            )
412            .unwrap();
413        assert_eq!(
414            db.region(AddressSpace::Code).unwrap().bytes_at(0, 2),
415            vec![4, 5]
416        );
417
418        apply_all(&mut db, undo, &env);
419        assert_eq!(
420            db.region(AddressSpace::Code).unwrap().bytes_at(0, 3),
421            vec![1, 2, 3]
422        );
423    }
424
425    #[test]
426    fn clear_bytes_command_undo() {
427        let env = TestEnvironment::new().with_file("test.bin", vec![1, 2, 3, 4, 5]);
428        let mut db = Db::new();
429        db.apply(
430            Command::map_bytes(AddressSpace::Code, 0, "test.bin", 0, 5),
431            Some(&env),
432        )
433        .unwrap();
434
435        let undo = db
436            .apply(Command::clear_bytes(AddressSpace::Code, 1, 2), None)
437            .unwrap();
438        assert_eq!(
439            db.region(AddressSpace::Code).unwrap().bytes_at(0, 5),
440            vec![1, 4, 5]
441        );
442
443        apply_all(&mut db, undo, &env);
444        assert_eq!(
445            db.region(AddressSpace::Code).unwrap().bytes_at(0, 5),
446            vec![1, 2, 3, 4, 5]
447        );
448    }
449
450    #[test]
451    fn set_constant_bytes_command_undo() {
452        let env = TestEnvironment::new().with_file("test.bin", vec![1, 2, 3]);
453        let mut db = Db::new();
454        db.apply(
455            Command::map_bytes(AddressSpace::Code, 0, "test.bin", 0, 3),
456            Some(&env),
457        )
458        .unwrap();
459
460        let undo = db
461            .apply(
462                Command::set_constant_bytes(AddressSpace::Code, 0, 2, 0xFF),
463                None,
464            )
465            .unwrap();
466        assert_eq!(
467            db.region(AddressSpace::Code).unwrap().bytes_at(0, 3),
468            vec![0xFF, 0xFF, 3]
469        );
470
471        apply_all(&mut db, undo, &env);
472        assert_eq!(
473            db.region(AddressSpace::Code).unwrap().bytes_at(0, 3),
474            vec![1, 2, 3]
475        );
476    }
477}