Skip to main content

i8051_disassembler/
db.rs

1use std::collections::BTreeMap;
2use std::range::Range;
3
4use serde::{Deserialize, Serialize};
5
6use crate::address::{AREA_ORDER, AddressSpace, AddressValue, PhysicalAddr, Xref};
7use crate::command::{Command, Environment};
8use crate::labels::{ImplicitLabels, LabelCollector};
9pub use crate::region::{ByteRange, Region};
10
11pub struct Db {
12    regions: BTreeMap<AddressSpace, Region>,
13}
14
15impl Db {
16    pub fn new() -> Self {
17        Self {
18            regions: BTreeMap::new(),
19        }
20    }
21
22    pub fn region(&self, space: AddressSpace) -> Option<&Region> {
23        self.regions.get(&space)
24    }
25
26    pub fn region_mut(&mut self, space: AddressSpace) -> &mut Region {
27        self.regions.entry(space).or_insert_with(Region::new)
28    }
29
30    pub fn xrefs_to(&self, target: &PhysicalAddr) -> Vec<Xref> {
31        let mut xrefs = Vec::new();
32        for (&space, region) in &self.regions {
33            xrefs.extend(region.xrefs_to(space, target));
34        }
35        xrefs
36    }
37
38    pub fn xrefs_from(&self, source: &PhysicalAddr) -> Vec<Xref> {
39        let Some(region) = self.regions.get(&source.space) else {
40            return Vec::new();
41        };
42        region.xrefs_from(source)
43    }
44
45    fn implicit_labels(&self) -> ImplicitLabels {
46        let mut label_collector = LabelCollector::default();
47        for (&space, region) in &self.regions {
48            region.collect_refs(space, &mut label_collector);
49        }
50        label_collector.into_implicit_labels()
51    }
52
53    pub fn render(&self, space: AddressSpace) -> Vec<Line> {
54        let implicit_labels = self.implicit_labels();
55
56        self.regions
57            .get(&space)
58            .map(|region| region.render(space, &implicit_labels))
59            .unwrap_or_default()
60    }
61
62    pub fn render_range(
63        &self,
64        space: AddressSpace,
65        start: AddressValue,
66        end: AddressValue,
67    ) -> Vec<Line> {
68        self.render(space)
69            .into_iter()
70            .filter(|line| {
71                let addr = line.addr();
72                addr >= start && addr < end
73            })
74            .collect()
75    }
76
77    pub fn to_sdas(&self) -> String {
78        let mut s = String::new();
79        let implicit_labels = self.implicit_labels();
80
81        for &space in &AREA_ORDER {
82            let Some(region) = self.regions.get(&space) else {
83                continue;
84            };
85            s.push_str(space.area_header());
86            for line in region.render(space, &implicit_labels) {
87                s.push_str(&line.to_sdas());
88            }
89        }
90        s
91    }
92
93    pub fn to_commands(&self) -> Vec<Command> {
94        let mut commands = Vec::new();
95        for (&space, region) in &self.regions {
96            commands.extend(region.to_commands(space));
97        }
98        commands
99    }
100
101    pub fn apply(
102        &mut self,
103        command: Command,
104        env: Option<&dyn Environment>,
105    ) -> Result<Vec<Command>, Error> {
106        command.apply(self, env)
107    }
108}
109
110impl Default for Db {
111    fn default() -> Self {
112        Self::new()
113    }
114}
115
116#[derive(Debug, Clone)]
117pub enum Line {
118    Org {
119        addr: AddressValue,
120    },
121    Blank,
122    Comment {
123        addr: AddressValue,
124        text: String,
125    },
126    Label {
127        addr: AddressValue,
128        name: String,
129    },
130    Instruction {
131        addr: AddressValue,
132        text: String,
133        bytes: Vec<u8>,
134    },
135    Data {
136        addr: AddressValue,
137        data_type: DataType,
138        bytes: Vec<u8>,
139    },
140    Raw {
141        addr: AddressValue,
142        bytes: Vec<u8>,
143    },
144    Function {
145        addr: AddressValue,
146        name: String,
147        signature: Option<String>,
148        length: AddressValue,
149        noreturn: bool,
150    },
151}
152
153impl Line {
154    pub fn addr(&self) -> AddressValue {
155        match self {
156            Self::Org { addr, .. }
157            | Self::Comment { addr, .. }
158            | Self::Label { addr, .. }
159            | Self::Function { addr, .. }
160            | Self::Instruction { addr, .. }
161            | Self::Data { addr, .. }
162            | Self::Raw { addr, .. } => *addr,
163            Self::Blank => 0,
164        }
165    }
166
167    pub fn to_sdas(&self) -> String {
168        match self {
169            Self::Org { addr } => format!(".org 0x{addr:X}\n"),
170            Self::Blank => "\n".to_string(),
171            Self::Comment { text, .. } => format!("; {text}\n"),
172            Self::Label { name, .. } => format!("_{name}:\n"),
173            Self::Instruction { text, .. } => format!("{text}\n"),
174            Self::Data { bytes, .. } => {
175                let db = bytes
176                    .iter()
177                    .map(|b| format!("0x{b:02X}"))
178                    .collect::<Vec<_>>()
179                    .join(", ");
180                format!("    .db {db}\n")
181            }
182            Self::Raw { addr, bytes } => {
183                let mut out = String::new();
184                for (i, chunk) in bytes.chunks(8).enumerate() {
185                    let chunk_addr = addr.saturating_add((i * 8) as AddressValue);
186                    let db = chunk
187                        .iter()
188                        .map(|b| format!("0x{b:02X}"))
189                        .collect::<Vec<_>>()
190                        .join(", ");
191                    if i == 0 {
192                        out.push_str(&format!(
193                            "; Unknown bytes at 0x{chunk_addr:04X}\n    .db {db}\n"
194                        ));
195                    } else {
196                        out.push_str(&format!("    .db {db}\n"));
197                    }
198                }
199                out
200            }
201            Self::Function {
202                addr,
203                name,
204                signature,
205                length,
206                noreturn,
207            } => {
208                let sig = signature.as_deref().unwrap_or("()");
209                let mut line = format!("0x{addr:05X}: fn {name}{sig}");
210                if *noreturn {
211                    line.push_str("; noreturn");
212                } else {
213                    line.push_str(&format!("; len = 0x{length:X}"));
214                }
215                format!("{line}\n")
216            }
217        }
218    }
219}
220
221#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct Function {
223    pub addr: PhysicalAddr,
224    pub name: String,
225    pub signature: Option<String>,
226    pub length: AddressValue,
227    pub noreturn: bool,
228}
229
230#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
231pub enum DataType {
232    Byte,
233    Word,
234    Dword,
235    Qword,
236    Reference(Box<DataType>),
237    Equivalent(Box<DataType>, String),
238    Array(Box<DataType>, usize),
239    String(usize),
240    Struct(Vec<DataType>),
241}
242
243#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
244pub enum OperandOverride {
245    Label(String),
246    LabelOffset { label: String, offset: i32 },
247    Text(String),
248}
249
250#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
251pub enum Equivalent {
252    Code(Vec<Option<OperandOverride>>),
253    Data(DataType, AddressValue),
254}
255
256#[derive(Debug, Clone, Eq, PartialEq)]
257pub struct EquivalentRange {
258    pub end: AddressValue,
259    pub equivalent: Equivalent,
260}
261
262#[derive(Debug, Clone, Eq, PartialEq)]
263pub enum EquivalentAt<'a> {
264    Undefined(Range<AddressValue>),
265    Defined {
266        start: AddressValue,
267        range: &'a EquivalentRange,
268    },
269}
270
271impl<'a> EquivalentAt<'a> {
272    pub fn is_defined(&self) -> bool {
273        matches!(self, Self::Defined { .. })
274    }
275}
276
277#[derive(Debug)]
278pub enum Error {
279    NoEnvironment,
280    Overlap(AddressValue),
281    InvalidAddress(AddressValue),
282    InvalidEquivalent,
283    NotUndefined(AddressValue),
284    Io(std::io::Error),
285}
286
287#[cfg(test)]
288mod tests {
289    use std::collections::HashMap;
290    use std::io;
291
292    use super::*;
293    use crate::address::XrefType;
294    use crate::command::Command;
295    use pretty_assertions::assert_eq;
296
297    static TEST_BINARY: [u8; 12] = [
298        0x02, 0x00, 0x10, // LJMP 0x10
299        0x74, 0x01, // MOV A, #0x1
300        0xF0, // MOVX @DPTR, A
301        0x90, 0x00, 0x10, // MOV DPTR, #0x10
302        0x93, // MOVC A, @A+DPTR
303        0x80, 0xF7, // SJMP 0x3 (rel = 3 - (10 + 2))
304    ];
305
306    struct TestEnvironment {
307        files: HashMap<String, Vec<u8>>,
308    }
309
310    impl TestEnvironment {
311        fn new() -> Self {
312            Self {
313                files: HashMap::new(),
314            }
315        }
316
317        fn with_file(mut self, name: impl Into<String>, bytes: Vec<u8>) -> Self {
318            self.files.insert(name.into(), bytes);
319            self
320        }
321    }
322
323    impl Environment for TestEnvironment {
324        fn load_file_bytes(
325            &self,
326            file: &str,
327            offset: usize,
328            size: AddressValue,
329        ) -> Result<Vec<u8>, io::Error> {
330            let data = self.files.get(file).ok_or_else(|| {
331                io::Error::new(io::ErrorKind::NotFound, format!("file not found: {file}"))
332            })?;
333            let end = offset.saturating_add(size as usize);
334            if end > data.len() {
335                return Err(io::Error::new(
336                    io::ErrorKind::UnexpectedEof,
337                    "read past end of file",
338                ));
339            }
340            Ok(data[offset..end].to_vec())
341        }
342    }
343
344    fn apply_all(db: &mut Db, commands: Vec<Command>, env: &TestEnvironment) {
345        for command in commands {
346            db.apply(command, Some(env)).unwrap();
347        }
348    }
349
350    fn make_test_db() -> Db {
351        let mut db = Db::new();
352
353        let code = db.region_mut(AddressSpace::Code);
354        code.set_bytes("test.bin", 0, 0, &TEST_BINARY);
355
356        code.set_label(0, "start");
357        code.set_equivalent(0, Equivalent::Code(vec![])).unwrap();
358
359        code.set_comment(3, "Start of loop");
360        code.set_label(3, "loop");
361        code.set_equivalent(3, Equivalent::Code(vec![])).unwrap();
362        code.set_equivalent(5, Equivalent::Code(vec![])).unwrap();
363        code.set_equivalent(6, Equivalent::Code(vec![])).unwrap();
364        code.set_equivalent(9, Equivalent::Code(vec![])).unwrap();
365        code.set_equivalent(10, Equivalent::Code(vec![])).unwrap();
366        db
367    }
368
369    #[test]
370    fn test_db() {
371        let db = make_test_db();
372        assert_eq!(
373            db.xrefs_to(&PhysicalAddr {
374                space: AddressSpace::Code,
375                offset: 3
376            }),
377            vec![Xref {
378                xref_type: XrefType::Jump,
379                from: PhysicalAddr {
380                    space: AddressSpace::Code,
381                    offset: 10
382                },
383                to: PhysicalAddr {
384                    space: AddressSpace::Code,
385                    offset: 3
386                },
387            }]
388        );
389
390        assert_eq!(
391            db.xrefs_from(&PhysicalAddr {
392                space: AddressSpace::Code,
393                offset: 10
394            }),
395            vec![Xref {
396                xref_type: XrefType::Jump,
397                from: PhysicalAddr {
398                    space: AddressSpace::Code,
399                    offset: 10
400                },
401                to: PhysicalAddr {
402                    space: AddressSpace::Code,
403                    offset: 3
404                },
405            }]
406        );
407
408        let expected = concat!(
409            ".area CODE (CODE,ABS)\n",
410            ".org 0x0\n",
411            "\n",
412            "_start:\n",
413            "    LJMP    #0x0010\n",
414            "; Start of loop\n",
415            "_loop:\n",
416            "    MOV     A,#01\n",
417            "    MOVX    @DPTR,A\n",
418            "    MOV     DPTR,#0x0010\n",
419            "    MOVC    A,@A+DPTR\n",
420            "    SJMP    loop\n",
421        );
422        assert_eq!(db.to_sdas(), expected);
423    }
424
425    #[test]
426    fn test_db_to_commands() {
427        let db = make_test_db();
428        let commands = db.to_commands();
429        let env = TestEnvironment::new().with_file("test.bin", TEST_BINARY.to_vec());
430        let mut new_db = Db::new();
431        for command in commands {
432            let env =
433                matches!(command, Command::MapBytes { .. }).then_some(&env as &dyn Environment);
434            new_db.apply(command, env).expect("command should apply");
435        }
436        assert_eq!(new_db.to_sdas(), db.to_sdas());
437    }
438
439    #[test]
440    fn map_bytes_command_undo() {
441        let env = TestEnvironment::new()
442            .with_file("test.bin", vec![1, 2, 3])
443            .with_file("other.bin", vec![4, 5]);
444        let mut db = Db::new();
445        db.apply(
446            Command::map_bytes(AddressSpace::Code, 0, "test.bin", 0, 3),
447            Some(&env),
448        )
449        .unwrap();
450
451        let code = db.region(AddressSpace::Code).unwrap();
452        assert_eq!(code.bytes_at(0, 3), vec![1, 2, 3]);
453
454        let undo = db
455            .apply(
456                Command::map_bytes(AddressSpace::Code, 0, "other.bin", 0, 2),
457                Some(&env),
458            )
459            .unwrap();
460        assert_eq!(
461            db.region(AddressSpace::Code).unwrap().bytes_at(0, 2),
462            vec![4, 5]
463        );
464
465        apply_all(&mut db, undo, &env);
466        assert_eq!(
467            db.region(AddressSpace::Code).unwrap().bytes_at(0, 3),
468            vec![1, 2, 3]
469        );
470    }
471
472    #[test]
473    fn clear_bytes_command_undo() {
474        let env = TestEnvironment::new().with_file("test.bin", vec![1, 2, 3, 4, 5]);
475        let mut db = Db::new();
476        db.apply(
477            Command::map_bytes(AddressSpace::Code, 0, "test.bin", 0, 5),
478            Some(&env),
479        )
480        .unwrap();
481
482        let undo = db
483            .apply(Command::clear_bytes(AddressSpace::Code, 1, 2), None)
484            .unwrap();
485        assert_eq!(
486            db.region(AddressSpace::Code).unwrap().bytes_at(0, 5),
487            vec![1, 4, 5]
488        );
489
490        apply_all(&mut db, undo, &env);
491        assert_eq!(
492            db.region(AddressSpace::Code).unwrap().bytes_at(0, 5),
493            vec![1, 2, 3, 4, 5]
494        );
495    }
496
497    #[test]
498    fn set_constant_bytes_command_undo() {
499        let env = TestEnvironment::new().with_file("test.bin", vec![1, 2, 3]);
500        let mut db = Db::new();
501        db.apply(
502            Command::map_bytes(AddressSpace::Code, 0, "test.bin", 0, 3),
503            Some(&env),
504        )
505        .unwrap();
506
507        let undo = db
508            .apply(
509                Command::set_constant_bytes(AddressSpace::Code, 0, 2, 0xFF),
510                None,
511            )
512            .unwrap();
513        assert_eq!(
514            db.region(AddressSpace::Code).unwrap().bytes_at(0, 3),
515            vec![0xFF, 0xFF, 3]
516        );
517
518        apply_all(&mut db, undo, &env);
519        assert_eq!(
520            db.region(AddressSpace::Code).unwrap().bytes_at(0, 3),
521            vec![1, 2, 3]
522        );
523    }
524}