macro_asm_builder/
macro_asm_builder.rs

1/// A module defining the AsmNode enum, which is used to represent and process
2/// the assembly code.
3mod tree;
4
5/// A module with function to read text and convert it to an Inode with a Source
6/// leaf for each line of source code.
7mod assembly_source;
8
9/// A module containing the functions used to import assembly files with the
10/// `@import` directive.
11mod import;
12
13/// A module used to register macros with the `@macro` directive and expand
14/// macros called in the source.
15mod macros;
16
17/// A module used to register raw values with the directives `@rawbytes` or
18/// `@constant`.
19mod raw;
20
21/// A module to check and expand `@align` directives.
22mod align;
23
24/// A module to register labels and link to them.
25mod label;
26
27/// A module to register string and transform them in raw bytes.
28mod strings;
29
30/// A module to register sections and put code in them.
31mod section;
32
33/// A module to register flat defines and math expressions.
34mod define;
35
36/// A mod with miscellaneous useful functions.
37pub mod utils;
38
39use crate::tree::*;
40use crate::tree::AsmNode::*;
41use crate::assembly_source::parse_source;
42use std::collections::HashMap;
43use macros::*;
44
45/// The collection of the assembly code tree and all the mutable context needed
46/// to process it
47pub struct Assembler<'a> {
48    /// The root element of the tree used to represent and process the assembly
49    /// code
50    root: AsmNode,
51
52    /// A map of all the macros linking their names and number of arguments to
53    /// their contents
54    macros: HashMap<MacroID, String>,
55
56    /// A map of all flat defines linking their names to their contents
57    defines: HashMap<String, Vec<String>>,
58
59    /// The number of bytes in a constant or address
60    wordsize: usize,
61
62    /// The pattern that will be put as padding to replace `@align` directives
63    pub align_pattern: Vec<u8>,
64
65    /// Address of the first instruction of first data in the resulting binary
66    pub start_address: usize,
67
68    /// A function that expand implementation-specifics macros. Takes a vector
69    /// of tokens from a line of code and return Ok<None> if no macro expansion
70    /// is needed, Ok<txt> to replace the text with txt, or Err<txt> to return
71    /// an error message destined to the user.
72    pub implementation_macro: &'a dyn Fn(&Vec<String>) -> Result<Option<String>, String>,
73
74    /// A function used to compile assembly source code into machine code. It
75    /// is implementation-specific. It takes a vector if tokens from a line of
76    /// code and return Ok<bytes> to return the machine code or Err<txt> to
77    /// return an error message destined to the user. When this function is run,
78    /// all code left in the tree should be raw assembly and not macros or
79    /// directive. Thus, there is no way this function can ignore code.
80    pub micro_assembly: &'a dyn Fn(&Vec<String>) -> Result<Vec<u8>, String>,
81}
82
83impl Assembler<'_> {
84    /// Takes some assembly code as input and set up the assembler state with it
85    pub fn from_text(text: &str) -> Self {
86        Self::from_named_text(text, "./__asm_init")
87    }
88
89    /// As from_text but the name is chosen
90    fn from_named_text(text: &str, name: &str) -> Self {
91        fn default_implementation_macro(_: &Vec<String>) -> Result<Option<String>, String> {
92            Ok(None)
93        }
94        fn default_micro_assembly(_: &Vec<String>) -> Result<Vec<u8>, String> {
95            Err("Micro assembly function should be given by the assembler implementation.".to_string())
96        }
97
98        Assembler {
99            root: parse_source(text, name),
100            macros: HashMap::new(),
101            defines: HashMap::new(),
102            wordsize: 0,
103            align_pattern: vec![0],
104            start_address: 0,
105            implementation_macro: &default_implementation_macro,
106            micro_assembly: &default_micro_assembly,
107        }
108    }
109
110    /// Initialize the assembler state to be ready to read the input file
111    pub fn from_file(path: &str) -> Self {
112        let mut import_directive = "@import \"".to_string();
113        import_directive.push_str(path);
114        import_directive.push_str("\"");
115        Self::from_text(&import_directive)
116    }
117
118    /// Set the wordsize of the assembler given it in bits. Return None if the
119    /// value is valid and an error message otherwise.
120    pub fn set_word_size(&mut self, wordsize: usize) -> Option<&str> {
121        match wordsize {
122            8 | 16 | 32 | 64 | 128 => {
123                self.wordsize = wordsize / 8;
124                None
125            },
126            x => {
127                if x > 128 {
128                    Some("Only word sizes of 128 bits or less are supported. This is not a limitation of the Reflet architecture but one of this assembler.")
129                } else {
130                    Some("Valid word sizes should be 8 bits times a power of two such as 8, 16, or 32.")
131                }
132            },
133        }
134    }
135
136    /// Add some text at the beginning of the tree.
137    pub fn add_text_before(&mut self, txt: &str, name: &str) {
138        match &mut self.root {
139            Inode(list) => {
140                list.insert(0, parse_source(txt, name));
141            },
142            _ => {
143                panic!("Assembler's root should have been an inode!");
144            },
145        }
146    }
147
148    /// Add some text at the end of the tree.
149    pub fn add_text_after(&mut self, txt: &str, name: &str) {
150        match &mut self.root {
151            Inode(list) => {
152                list.push(parse_source(txt, name));
153            },
154            _ => {
155                panic!("Assembler's root should have been an inode!");
156            },
157        }
158    }
159
160    /// Runs all passes of the assembly process that might add new text in the
161    /// tree.
162    fn run_text_adding_passes(&mut self) {
163        // All the operations that might add new text on the tree. If an
164        // operation added new text, we want to run them all from the start to
165        // ensure that we are not missing anything. It should end with the most
166        // costly operations as we don't want to rerun them too much time.
167        const TEXT_ADDING_PASSES: [&dyn Fn(&mut Assembler) -> bool; 5] = [
168            &import::include_source,
169            &macros::register_macros,
170            &run_implementation_macros,
171            &macros::expand_macros,
172            &define::handle_define,
173        ];
174
175        let mut pass_index = 0;
176        while pass_index < TEXT_ADDING_PASSES.len() {
177            if TEXT_ADDING_PASSES[pass_index](self) {
178                pass_index = 0;
179            } else {
180                pass_index += 1;
181            }
182        }
183    }
184
185    /// Perform a complete assembly process. Return a vector of bytes of the
186    /// resulting binary in case of success and an error message in case of
187    /// error.
188    pub fn assemble(&mut self) -> Result<Vec<u8>, String> {
189        // Manage macros and directives
190        self.run_text_adding_passes();
191        section::handle_sections(self);
192        label::register_labels(self);
193        align::register_align(self);
194        raw::expand_constants(self);
195        raw::decode_raw_bytes(self);
196        strings::register_strings(self);
197
198        // Run the micro-assembler
199        self.run_micro_assembler();
200
201        // Finish the linking and padding
202        align::expand_align(self);
203        label::expand_labels(self);
204        let ret = self.collect_raw();
205
206        // Return raw or report errors
207        match self.root.check_for_errors() {
208            Some(txt) => Err(txt),
209            None => Ok(ret),
210        }
211    }
212
213    /// Executes the implementation-specific micro-assembler.
214    fn run_micro_assembler(&mut self) {
215        let mut running_micro_assembler = | node: &AsmNode | -> Option<AsmNode> {
216            match node {
217                Source{code, meta} => match (self.micro_assembly)(code) {
218                    Err(msg) => Some(Error{msg, meta: meta.clone()}),
219                    Ok(raw) => Some(Raw(raw)),
220                },
221                _ => None,
222            }
223        };
224
225        self.root.traverse_tree(&mut running_micro_assembler);
226    }
227
228    /// Return a label dump of the tree
229    pub fn label_dump(&mut self) -> String {
230        label::label_dump(self)
231    }
232
233    /// Gather all the raw part of a tree and extract them. Make an error for
234    /// non-raw parts.
235    fn collect_raw(&mut self) -> Vec<u8> {
236        let mut ret: Vec<u8> = vec![];
237        let mut collecting_raw = | node: &AsmNode | -> Option<AsmNode> {
238            match node {
239                Raw(data) => {
240                    ret.extend(data);
241                    None
242                },
243                Error{msg: _, meta: _} => None,
244                Label{name: _, is_definition: true, meta: _} => None,
245                x => Some(Error{msg: format!("There is a bug in the assembler, the node {} should not be left over in collect_raw.", &x.to_string()), meta: Metadata{line: !0, raw: "!!!".to_string(), source_file: "!!!".to_string()}}),
246            }
247        };
248
249        self.root.traverse_tree(&mut collecting_raw);
250        ret
251    }
252
253    /// From a number, convert it into a stream of bytes of the size required by
254    /// the processor's wordsize. Return None if the number is too big to be
255    /// represented.
256    fn format_number(&self, number: u128) -> Option<Vec<u8>> {
257        let mut n = number;
258        let mut ret = vec![];
259        for _i in 0..self.wordsize {
260            ret.push((n & 0xFF) as u8);
261            n = n >> 8
262        }
263        if n == 0 {
264            Some(ret)
265        } else {
266            None
267        }
268    }
269
270    /// Tries to convert the string representation of a number into a stream of
271    /// bytes.
272    fn format_string_into_number(&self, s: &str) -> Option<Vec<u8>> {
273        match utils::format_string_into_number(s) {
274            Some((num, false)) => self.format_number(num),
275            Some((num, true)) => {
276                let mut num_shorten = num;
277                for i in self.wordsize..(128/8) {
278                    // Ensure that there is no data bits above the wordsize
279                    let mask: u128 = (0xFF << (i * 8)) >> 1;
280                    if ((num & mask) << 1) >> (i * 8) != 0xFF {
281                        return None
282                    }
283                    // Remove sign extenton bits
284                    num_shorten = num_shorten & !(0xFF << (i*8));
285                }
286                self.format_number(num_shorten)
287            },
288            None => None,
289        }
290    }
291}
292
293/// Executes the implementation-specific macros. Return true if some macros
294/// have been expanded.
295fn run_implementation_macros(asm: &mut Assembler) -> bool {
296    let mut expanded_macros = false;
297    let mut running_implementation_macros = | node: &AsmNode | -> Option<AsmNode> {
298        match node {
299            Source{code, meta} => match (asm.implementation_macro)(code) {
300                Err(msg) => Some(Error{msg, meta: meta.clone()}),
301                Ok(None) => None,
302                Ok(Some(txt)) => {
303                    expanded_macros = true;
304                    Some(parse_source(&txt, &format!("Expantion of line {} from file {}, being {}", meta.line, &meta.source_file, &meta.raw)))
305                },
306            },
307            _ => None,
308        }
309    };
310
311    asm.root.traverse_tree(&mut running_implementation_macros);
312    expanded_macros
313}
314
315
316
317/* --------------------------------- Testing -------------------------------- */
318#[test]
319fn test_set_word_size() {
320    let mut asm = Assembler::from_text("");
321    assert_eq!(asm.set_word_size(8), None);
322    assert_eq!(asm.set_word_size(128), None);
323    assert_ne!(asm.set_word_size(256), None);
324    assert_ne!(asm.set_word_size(63), None);
325    assert_ne!(asm.set_word_size(0), None);
326}
327
328#[test]
329fn test_format_number() {
330    let mut asm = Assembler::from_text("");
331    assert_eq!(asm.set_word_size(8), None);
332    assert_eq!(asm.format_number(12), Some(vec![12]));
333    assert_eq!(asm.format_number(250), Some(vec![250]));
334    assert_eq!(asm.format_number(260), None);
335    assert_eq!(asm.set_word_size(16), None);
336    assert_eq!(asm.format_number(12), Some(vec![12, 0]));
337    assert_eq!(asm.format_number(250), Some(vec![250, 0]));
338    assert_eq!(asm.format_number(260), Some(vec![4, 1]));
339    assert_eq!(asm.format_number(0x10000), None);
340}
341
342#[test]
343fn format_string_into_number() {
344    let mut asm = Assembler::from_text("");
345    assert_eq!(asm.set_word_size(8), None);
346    assert_eq!(asm.format_string_into_number("12"), Some(vec![12]));
347    assert_eq!(asm.format_string_into_number("250"), Some(vec![250]));
348    assert_eq!(asm.format_string_into_number("260"), None);
349    assert_eq!(asm.format_string_into_number("-129"), None);
350    assert_eq!(asm.format_string_into_number("-10"), Some(vec![0xF6]));
351    assert_eq!(asm.set_word_size(16), None);
352    assert_eq!(asm.format_string_into_number("12"), Some(vec![12, 0]));
353    assert_eq!(asm.format_string_into_number("250"), Some(vec![250, 0]));
354    assert_eq!(asm.format_string_into_number("260"), Some(vec![4, 1]));
355    assert_eq!(asm.format_string_into_number("0x10000"), None);
356    assert_eq!(asm.format_string_into_number("Potate"), None);
357    assert_eq!(asm.format_string_into_number("0xFFF"), Some(vec![0xFF, 0xF]));
358    assert_eq!(asm.format_string_into_number("-10"), Some(vec![0xF6, 0xFF]));
359}
360