macro_asm_builder/
macro_asm_builder.rs

1#![allow(clippy::type_complexity)]
2
3/// A module defining the AsmNode enum, which is used to represent and process
4/// the assembly code.
5mod tree;
6
7/// A module with function to read text and convert it to an Inode with a Source
8/// leaf for each line of source code.
9mod assembly_source;
10
11/// A module containing the functions used to import assembly files with the
12/// `@import` directive.
13mod import;
14
15/// A module used to register macros with the `@macro` directive and expand
16/// macros called in the source.
17mod macros;
18
19/// A module used to register raw values with the directives `@rawbytes` or
20/// `@constant`.
21mod raw;
22
23/// A module to check and expand `@align` directives.
24mod align;
25
26/// A module to register labels and link to them.
27mod label;
28
29/// A module to register string and transform them in raw bytes.
30mod strings;
31
32/// A module to register sections and put code in them.
33mod section;
34
35/// A module to register flat defines and math expressions.
36mod define;
37
38/// A mod with miscellaneous useful functions.
39pub mod utils;
40
41use crate::tree::*;
42use crate::tree::AsmNode::*;
43use crate::assembly_source::parse_source;
44use std::collections::HashMap;
45use macros::*;
46
47/// The collection of the assembly code tree and all the mutable context needed
48/// to process it
49pub struct Assembler<'a> {
50    /// The root element of the tree used to represent and process the assembly
51    /// code
52    root: AsmNode,
53
54    /// A map of all the macros linking their names and number of arguments to
55    /// their contents
56    macros: HashMap<MacroID, String>,
57
58    /// Count of how many time we expanded macros. Needed for expansion-unique
59    /// symbols
60    macro_expansion_count: usize,
61
62    /// A map of all flat defines linking their names to their contents
63    defines: HashMap<String, Vec<String>>,
64
65    /// The number of bytes in a constant or address
66    wordsize: usize,
67
68    /// The pattern that will be put as padding to replace `@align` directives
69    pub align_pattern: Vec<u8>,
70
71    /// Address of the first instruction of first data in the resulting binary
72    pub start_address: usize,
73
74    /// A function that expand implementation-specifics macros. Takes a vector
75    /// of tokens from a line of code and return Ok<None> if no macro expansion
76    /// is needed, Ok<txt> to replace the text with txt, or Err<txt> to return
77    /// an error message destined to the user.
78    pub implementation_macro: &'a dyn Fn(&Vec<String>) -> Result<Option<String>, String>,
79
80    /// A function used to compile assembly source code into machine code. It
81    /// is implementation-specific. It takes a vector if tokens from a line of
82    /// code and return Ok<bytes> to return the machine code or Err<txt> to
83    /// return an error message destined to the user. When this function is run,
84    /// all code left in the tree should be raw assembly and not macros or
85    /// directive. Thus, there is no way this function can ignore code.
86    pub micro_assembly: &'a dyn Fn(&Vec<String>) -> Result<Vec<u8>, String>,
87}
88
89impl Assembler<'_> {
90    /// Takes some assembly code as input and set up the assembler state with it
91    pub fn from_text(text: &str) -> Self {
92        Self::from_named_text(text, "./__asm_init")
93    }
94
95    /// As from_text but the name is chosen
96    fn from_named_text(text: &str, name: &str) -> Self {
97        fn default_implementation_macro(_: &Vec<String>) -> Result<Option<String>, String> {
98            Ok(None)
99        }
100        fn default_micro_assembly(_: &Vec<String>) -> Result<Vec<u8>, String> {
101            Err("Micro assembly function should be given by the assembler implementation.".to_string())
102        }
103
104        Assembler {
105            root: parse_source(text, name),
106            macros: HashMap::new(),
107            defines: HashMap::new(),
108            wordsize: 0,
109            align_pattern: vec![0],
110            start_address: 0,
111            implementation_macro: &default_implementation_macro,
112            micro_assembly: &default_micro_assembly,
113            macro_expansion_count: 0,
114        }
115    }
116
117    /// Initialize the assembler state to be ready to read the input file
118    pub fn from_file(path: &str) -> Self {
119        let mut import_directive = "@import \"".to_string();
120        import_directive.push_str(path);
121        import_directive.push('\"');
122        Self::from_text(&import_directive)
123    }
124
125    /// Set the wordsize of the assembler given it in bits. Return None if the
126    /// value is valid and an error message otherwise.
127    pub fn set_word_size(&mut self, wordsize: usize) -> Option<&str> {
128        match wordsize {
129            8 | 16 | 32 | 64 | 128 => {
130                self.wordsize = wordsize / 8;
131                None
132            },
133            x => {
134                if x > 128 {
135                    Some("Only word sizes of 128 bits or less are supported. This is not a limitation of the Reflet architecture but one of this assembler.")
136                } else {
137                    Some("Valid word sizes should be 8 bits times a power of two such as 8, 16, or 32.")
138                }
139            },
140        }
141    }
142
143    /// Add some text at the beginning of the tree.
144    pub fn add_text_before(&mut self, txt: &str, name: &str) {
145        match &mut self.root {
146            Inode(list) => {
147                list.insert(0, parse_source(txt, name));
148            },
149            _ => {
150                panic!("Assembler's root should have been an inode!");
151            },
152        }
153    }
154
155    /// Add some text at the end of the tree.
156    pub fn add_text_after(&mut self, txt: &str, name: &str) {
157        match &mut self.root {
158            Inode(list) => {
159                list.push(parse_source(txt, name));
160            },
161            _ => {
162                panic!("Assembler's root should have been an inode!");
163            },
164        }
165    }
166
167    /// Runs all passes of the assembly process that might add new text in the
168    /// tree.
169    fn run_text_adding_passes(&mut self) {
170        // All the operations that might add new text on the tree. If an
171        // operation added new text, we want to run them all from the start to
172        // ensure that we are not missing anything. It should end with the most
173        // costly operations as we don't want to rerun them too much time.
174        const TEXT_ADDING_PASSES: [&dyn Fn(&mut Assembler) -> bool; 5] = [
175            &import::include_source,
176            &macros::register_macros,
177            &define::handle_define,
178            &run_implementation_macros,
179            &macros::expand_macros,
180        ];
181
182        let mut pass_index = 0;
183        while pass_index < TEXT_ADDING_PASSES.len() {
184            if TEXT_ADDING_PASSES[pass_index](self) {
185                pass_index = 0;
186            } else {
187                pass_index += 1;
188            }
189        }
190    }
191
192    /// Perform a complete assembly process. Return a vector of bytes of the
193    /// resulting binary in case of success and an error message in case of
194    /// error.
195    pub fn assemble(&mut self) -> Result<Vec<u8>, String> {
196        // Manage macros and directives
197        self.run_text_adding_passes();
198        section::handle_sections(self);
199        label::register_labels(self);
200        align::register_align(self);
201        raw::expand_constants(self);
202        raw::decode_raw_bytes(self);
203        strings::register_strings(self);
204
205        // Run the micro-assembler
206        self.run_micro_assembler();
207
208        // Finish the linking and padding
209        align::expand_align(self);
210        label::expand_labels(self);
211        let ret = self.collect_raw();
212
213        // Return raw or report errors
214        match self.root.check_for_errors() {
215            Some(txt) => Err(txt),
216            None => Ok(ret),
217        }
218    }
219
220    /// Executes the implementation-specific micro-assembler.
221    fn run_micro_assembler(&mut self) {
222        let mut running_micro_assembler = | node: &AsmNode | -> Option<AsmNode> {
223            match node {
224                Source{code, meta} => match (self.micro_assembly)(code) {
225                    Err(msg) => Some(Error{msg, meta: meta.clone()}),
226                    Ok(raw) => Some(Raw(raw)),
227                },
228                _ => None,
229            }
230        };
231
232        self.root.traverse_tree(&mut running_micro_assembler);
233    }
234
235    /// Return a label dump of the tree
236    pub fn label_dump(&mut self) -> String {
237        label::label_dump(self)
238    }
239
240    /// Gather all the raw part of a tree and extract them. Make an error for
241    /// non-raw parts.
242    fn collect_raw(&mut self) -> Vec<u8> {
243        let mut ret: Vec<u8> = vec![];
244        let mut collecting_raw = | node: &AsmNode | -> Option<AsmNode> {
245            match node {
246                Raw(data) => {
247                    ret.extend(data);
248                    None
249                },
250                Error{msg: _, meta: _} => None,
251                Label{name: _, is_definition: true, meta: _} => None,
252                x => Some(Error{msg: format!("There is a bug in the assembler, the node {} should not be left over in collect_raw.", &x.to_string()), meta: Metadata{line: !0, raw: "!!!".to_string(), source_file: "!!!".to_string()}}),
253            }
254        };
255
256        self.root.traverse_tree(&mut collecting_raw);
257        ret
258    }
259
260    /// From a number, convert it into a stream of bytes of the size required by
261    /// the processor's wordsize. Return None if the number is too big to be
262    /// represented.
263    fn format_number(&self, number: u128) -> Option<Vec<u8>> {
264        let mut n = number;
265        let mut ret = vec![];
266        for _i in 0..self.wordsize {
267            ret.push((n & 0xFF) as u8);
268            n >>= 8
269        }
270        if n == 0 {
271            Some(ret)
272        } else {
273            None
274        }
275    }
276
277    /// Tries to convert the string representation of a number into a stream of
278    /// bytes.
279    fn format_string_into_number(&self, s: &str) -> Option<Vec<u8>> {
280        match utils::format_string_into_number(s) {
281            Some((num, false)) => self.format_number(num),
282            Some((num, true)) => {
283                let mut num_shorten = num;
284                for i in self.wordsize..(128/8) {
285                    // Ensure that there is no data bits above the wordsize
286                    let mask: u128 = (0xFF << (i * 8)) >> 1;
287                    if ((num & mask) << 1) >> (i * 8) != 0xFF {
288                        return None
289                    }
290                    // Remove sign extenton bits
291                    num_shorten &= !(0xFF << (i*8));
292                }
293                self.format_number(num_shorten)
294            },
295            None => None,
296        }
297    }
298}
299
300/// Executes the implementation-specific macros. Return true if some macros
301/// have been expanded.
302fn run_implementation_macros(asm: &mut Assembler) -> bool {
303    let mut expanded_macros = false;
304    let mut running_implementation_macros = | node: &AsmNode | -> Option<AsmNode> {
305        match node {
306            Source{code, meta} => match (asm.implementation_macro)(code) {
307                Err(msg) => Some(Error{msg, meta: meta.clone()}),
308                Ok(None) => None,
309                Ok(Some(txt)) => {
310                    expanded_macros = true;
311                    Some(parse_source(&txt, &format!("Expantion of line {} from file {}, being {}", meta.line, &meta.source_file, &meta.raw)))
312                },
313            },
314            _ => None,
315        }
316    };
317
318    asm.root.traverse_tree(&mut running_implementation_macros);
319    expanded_macros
320}
321
322
323
324/* --------------------------------- Testing -------------------------------- */
325#[test]
326fn test_set_word_size() {
327    let mut asm = Assembler::from_text("");
328    assert_eq!(asm.set_word_size(8), None);
329    assert_eq!(asm.set_word_size(128), None);
330    assert_ne!(asm.set_word_size(256), None);
331    assert_ne!(asm.set_word_size(63), None);
332    assert_ne!(asm.set_word_size(0), None);
333}
334
335#[test]
336fn test_format_number() {
337    let mut asm = Assembler::from_text("");
338    assert_eq!(asm.set_word_size(8), None);
339    assert_eq!(asm.format_number(12), Some(vec![12]));
340    assert_eq!(asm.format_number(250), Some(vec![250]));
341    assert_eq!(asm.format_number(260), None);
342    assert_eq!(asm.set_word_size(16), None);
343    assert_eq!(asm.format_number(12), Some(vec![12, 0]));
344    assert_eq!(asm.format_number(250), Some(vec![250, 0]));
345    assert_eq!(asm.format_number(260), Some(vec![4, 1]));
346    assert_eq!(asm.format_number(0x10000), None);
347}
348
349#[test]
350fn format_string_into_number() {
351    let mut asm = Assembler::from_text("");
352    assert_eq!(asm.set_word_size(8), None);
353    assert_eq!(asm.format_string_into_number("12"), Some(vec![12]));
354    assert_eq!(asm.format_string_into_number("250"), Some(vec![250]));
355    assert_eq!(asm.format_string_into_number("260"), None);
356    assert_eq!(asm.format_string_into_number("-129"), None);
357    assert_eq!(asm.format_string_into_number("-10"), Some(vec![0xF6]));
358    assert_eq!(asm.set_word_size(16), None);
359    assert_eq!(asm.format_string_into_number("12"), Some(vec![12, 0]));
360    assert_eq!(asm.format_string_into_number("250"), Some(vec![250, 0]));
361    assert_eq!(asm.format_string_into_number("260"), Some(vec![4, 1]));
362    assert_eq!(asm.format_string_into_number("0x10000"), None);
363    assert_eq!(asm.format_string_into_number("Potate"), None);
364    assert_eq!(asm.format_string_into_number("0xFFF"), Some(vec![0xFF, 0xF]));
365    assert_eq!(asm.format_string_into_number("-10"), Some(vec![0xF6, 0xFF]));
366}
367