macro_asm_builder/
macro_asm_builder.rs

1/// A module defining the AsmNode enum, which is used to represent and process
2/// the assembly code.
3mod tree;
4
5/// A module with function to read text and convert it to an Inode with a Source
6/// leaf for each line of source code.
7mod assembly_source;
8
9/// A module containing the functions used to import assembly files with the
10/// `@import` directive.
11mod import;
12
13/// A module used to register macros with the `@macro` directive and expand
14/// macros called in the source.
15mod macros;
16
17/// A module used to register raw values with the directives `@rawbytes` or
18/// `@constant`.
19mod raw;
20
21/// A module to check and expand `@align` directives.
22mod align;
23
24/// A module to register labels and link to them.
25mod label;
26
27/// A module to register string and transform them in raw bytes.
28mod strings;
29
30/// A module to register sections and put code in them.
31mod section;
32
33/// A module to register flat defines and math expressions.
34mod define;
35
36/// A mod with miscellaneous useful functions.
37pub mod utils;
38
39use crate::tree::*;
40use crate::tree::AsmNode::*;
41use crate::assembly_source::parse_source;
42use std::collections::HashMap;
43use macros::*;
44
45/// The collection of the assembly code tree and all the mutable context needed
46/// to process it
47pub struct Assembler<'a> {
48    /// The root element of the tree used to represent and process the assembly
49    /// code
50    root: AsmNode,
51
52    /// A map of all the macros linking their names to their contents
53    macros: HashMap<String, Macro>,
54
55    /// A map of all flat defines linking their names to their contents
56    defines: HashMap<String, Vec<String>>,
57
58    /// The number of bytes in a constant or address
59    wordsize: usize,
60
61    /// The pattern that will be put as padding to replace `@align` directives
62    pub align_pattern: Vec<u8>,
63
64    /// Address of the first instruction of first data in the resulting binary
65    pub start_address: usize,
66
67    /// A function that expand implementation-specifics macros. Takes a vector
68    /// of tokens from a line of code and return Ok<None> if no macro expansion
69    /// is needed, Ok<txt> to replace the text with txt, or Err<txt> to return
70    /// an error message destined to the user.
71    pub implementation_macro: &'a dyn Fn(&Vec<String>) -> Result<Option<String>, String>,
72
73    /// A function used to compile assembly source code into machine code. It
74    /// is implementation-specific. It takes a vector if tokens from a line of
75    /// code and return Ok<bytes> to return the machine code or Err<txt> to
76    /// return an error message destined to the user. When this function is run,
77    /// all code left in the tree should be raw assembly and not macros or
78    /// directive. Thus, there is no way this function can ignore code.
79    pub micro_assembly: &'a dyn Fn(&Vec<String>) -> Result<Vec<u8>, String>,
80}
81
82impl Assembler<'_> {
83    /// Takes some assembly code as input and set up the assembler state with it
84    pub fn from_text(text: &str) -> Self {
85        Self::from_named_text(text, "./__asm_init")
86    }
87
88    /// As from_text but the name is chosen
89    fn from_named_text(text: &str, name: &str) -> Self {
90        fn default_implementation_macro(_: &Vec<String>) -> Result<Option<String>, String> {
91            Ok(None)
92        }
93        fn default_micro_assembly(_: &Vec<String>) -> Result<Vec<u8>, String> {
94            Err("Micro assembly function should be given by the assembler implementation.".to_string())
95        }
96
97        Assembler {
98            root: parse_source(text, name),
99            macros: HashMap::new(),
100            defines: HashMap::new(),
101            wordsize: 0,
102            align_pattern: vec![0],
103            start_address: 0,
104            implementation_macro: &default_implementation_macro,
105            micro_assembly: &default_micro_assembly,
106        }
107    }
108
109    /// Initialize the assembler state to be ready to read the input file
110    pub fn from_file(path: &str) -> Self {
111        let mut import_directive = "@import \"".to_string();
112        import_directive.push_str(path);
113        import_directive.push_str("\"");
114        Self::from_text(&import_directive)
115    }
116
117    /// Set the wordsize of the assembler given it in bits. Return None if the
118    /// value is valid and an error message otherwise.
119    pub fn set_word_size(&mut self, wordsize: usize) -> Option<&str> {
120        match wordsize {
121            8 | 16 | 32 | 64 | 128 => {
122                self.wordsize = wordsize / 8;
123                None
124            },
125            x => {
126                if x > 128 {
127                    Some("Only word sizes of 128 bits or less are supported. This is not a limitation of the Reflet architecture but one of this assembler.")
128                } else {
129                    Some("Valid word sizes should be 8 bits times a power of two such as 8, 16, or 32.")
130                }
131            },
132        }
133    }
134
135    /// Add some text at the beginning of the tree.
136    pub fn add_text_before(&mut self, txt: &str, name: &str) {
137        match &mut self.root {
138            Inode(list) => {
139                list.insert(0, parse_source(txt, name));
140            },
141            _ => {
142                panic!("Assembler's root should have been an inode!");
143            },
144        }
145    }
146
147    /// Add some text at the end of the tree.
148    pub fn add_text_after(&mut self, txt: &str, name: &str) {
149        match &mut self.root {
150            Inode(list) => {
151                list.push(parse_source(txt, name));
152            },
153            _ => {
154                panic!("Assembler's root should have been an inode!");
155            },
156        }
157    }
158
159    /// Runs all passes of the assembly process that might add new text in the
160    /// tree.
161    fn run_text_adding_passes(&mut self) {
162        // All the operations that might add new text on the tree. If an
163        // operation added new text, we want to run them all from the start to
164        // ensure that we are not missing anything. It should end with the most
165        // costly operations as we don't want to rerun them too much time.
166        const TEXT_ADDING_PASSES: [&dyn Fn(&mut Assembler) -> bool; 5] = [
167            &import::include_source,
168            &macros::register_macros,
169            &run_implementation_macros,
170            &macros::expand_macros,
171            &define::handle_define,
172        ];
173
174        let mut pass_index = 0;
175        while pass_index < TEXT_ADDING_PASSES.len() {
176            if TEXT_ADDING_PASSES[pass_index](self) {
177                pass_index = 0;
178            } else {
179                pass_index += 1;
180            }
181        }
182    }
183
184    /// Perform a complete assembly process. Return a vector of bytes of the
185    /// resulting binary in case of success and an error message in case of
186    /// error.
187    pub fn assemble(&mut self) -> Result<Vec<u8>, String> {
188        // Manage macros and directives
189        self.run_text_adding_passes();
190        section::handle_sections(self);
191        label::register_labels(self);
192        align::register_align(self);
193        raw::expand_constants(self);
194        raw::decode_raw_bytes(self);
195        strings::register_strings(self);
196
197        // Run the micro-assembler
198        self.run_micro_assembler();
199
200        // Finish the linking and padding
201        align::expand_align(self);
202        label::expand_labels(self);
203        let ret = self.collect_raw();
204
205        // Return raw or report errors
206        match self.root.check_for_errors() {
207            Some(txt) => Err(txt),
208            None => Ok(ret),
209        }
210    }
211
212    /// Executes the implementation-specific micro-assembler.
213    fn run_micro_assembler(&mut self) {
214        let mut running_micro_assembler = | node: &AsmNode | -> Option<AsmNode> {
215            match node {
216                Source{code, meta} => match (self.micro_assembly)(code) {
217                    Err(msg) => Some(Error{msg, meta: meta.clone()}),
218                    Ok(raw) => Some(Raw(raw)),
219                },
220                _ => None,
221            }
222        };
223
224        self.root.traverse_tree(&mut running_micro_assembler);
225    }
226
227    /// Return a label dump of the tree
228    pub fn label_dump(&mut self) -> String {
229        label::label_dump(self)
230    }
231
232    /// Gather all the raw part of a tree and extract them. Make an error for
233    /// non-raw parts.
234    fn collect_raw(&mut self) -> Vec<u8> {
235        let mut ret: Vec<u8> = vec![];
236        let mut collecting_raw = | node: &AsmNode | -> Option<AsmNode> {
237            match node {
238                Raw(data) => {
239                    ret.extend(data);
240                    None
241                },
242                Error{msg: _, meta: _} => None,
243                Label{name: _, is_definition: true, meta: _} => None,
244                x => Some(Error{msg: format!("There is a bug in the assembler, the node {} should not be left over in collect_raw.", &x.to_string()), meta: Metadata{line: !0, raw: "!!!".to_string(), source_file: "!!!".to_string()}}),
245            }
246        };
247
248        self.root.traverse_tree(&mut collecting_raw);
249        ret
250    }
251
252    /// From a number, convert it into a stream of bytes of the size required by
253    /// the processor's wordsize. Return None if the number is too big to be
254    /// represented.
255    fn format_number(&self, number: u128) -> Option<Vec<u8>> {
256        let mut n = number;
257        let mut ret = vec![];
258        for _i in 0..self.wordsize {
259            ret.push((n & 0xFF) as u8);
260            n = n >> 8
261        }
262        if n == 0 {
263            Some(ret)
264        } else {
265            None
266        }
267    }
268
269    /// Tries to convert the string representation of a number into a stream of
270    /// bytes.
271    fn format_string_into_number(&self, s: &str) -> Option<Vec<u8>> {
272        match utils::format_string_into_number(s) {
273            Some((num, false)) => self.format_number(num),
274            Some((num, true)) => {
275                let mut num_shorten = num;
276                for i in self.wordsize..(128/8) {
277                    // Ensure that there is no data bits above the wordsize
278                    let mask: u128 = (0xFF << (i * 8)) >> 1;
279                    if ((num & mask) << 1) >> (i * 8) != 0xFF {
280                        return None
281                    }
282                    // Remove sign extenton bits
283                    num_shorten = num_shorten & !(0xFF << (i*8));
284                }
285                self.format_number(num_shorten)
286            },
287            None => None,
288        }
289    }
290}
291
292/// Executes the implementation-specific macros. Return true if some macros
293/// have been expanded.
294fn run_implementation_macros(asm: &mut Assembler) -> bool {
295    let mut expanded_macros = false;
296    let mut running_implementation_macros = | node: &AsmNode | -> Option<AsmNode> {
297        match node {
298            Source{code, meta} => match (asm.implementation_macro)(code) {
299                Err(msg) => Some(Error{msg, meta: meta.clone()}),
300                Ok(None) => None,
301                Ok(Some(txt)) => {
302                    expanded_macros = true;
303                    Some(parse_source(&txt, &format!("Expantion of line {} from file {}, being {}", meta.line, &meta.source_file, &meta.raw)))
304                },
305            },
306            _ => None,
307        }
308    };
309
310    asm.root.traverse_tree(&mut running_implementation_macros);
311    expanded_macros
312}
313
314
315
316/* --------------------------------- Testing -------------------------------- */
317#[test]
318fn test_set_word_size() {
319    let mut asm = Assembler::from_text("");
320    assert_eq!(asm.set_word_size(8), None);
321    assert_eq!(asm.set_word_size(128), None);
322    assert_ne!(asm.set_word_size(256), None);
323    assert_ne!(asm.set_word_size(63), None);
324    assert_ne!(asm.set_word_size(0), None);
325}
326
327#[test]
328fn test_format_number() {
329    let mut asm = Assembler::from_text("");
330    assert_eq!(asm.set_word_size(8), None);
331    assert_eq!(asm.format_number(12), Some(vec![12]));
332    assert_eq!(asm.format_number(250), Some(vec![250]));
333    assert_eq!(asm.format_number(260), None);
334    assert_eq!(asm.set_word_size(16), None);
335    assert_eq!(asm.format_number(12), Some(vec![12, 0]));
336    assert_eq!(asm.format_number(250), Some(vec![250, 0]));
337    assert_eq!(asm.format_number(260), Some(vec![4, 1]));
338    assert_eq!(asm.format_number(0x10000), None);
339}
340
341#[test]
342fn format_string_into_number() {
343    let mut asm = Assembler::from_text("");
344    assert_eq!(asm.set_word_size(8), None);
345    assert_eq!(asm.format_string_into_number("12"), Some(vec![12]));
346    assert_eq!(asm.format_string_into_number("250"), Some(vec![250]));
347    assert_eq!(asm.format_string_into_number("260"), None);
348    assert_eq!(asm.format_string_into_number("-129"), None);
349    assert_eq!(asm.format_string_into_number("-10"), Some(vec![0xF6]));
350    assert_eq!(asm.set_word_size(16), None);
351    assert_eq!(asm.format_string_into_number("12"), Some(vec![12, 0]));
352    assert_eq!(asm.format_string_into_number("250"), Some(vec![250, 0]));
353    assert_eq!(asm.format_string_into_number("260"), Some(vec![4, 1]));
354    assert_eq!(asm.format_string_into_number("0x10000"), None);
355    assert_eq!(asm.format_string_into_number("Potate"), None);
356    assert_eq!(asm.format_string_into_number("0xFFF"), Some(vec![0xFF, 0xF]));
357    assert_eq!(asm.format_string_into_number("-10"), Some(vec![0xF6, 0xFF]));
358}
359