maikor_asm_parser/
lib.rs

1mod arg_matching;
2mod arg_patterns;
3mod interpreter;
4mod ops;
5mod parsers;
6
7use crate::arg_matching::{arg_list_to_letters, get_op_code};
8use crate::interpreter::interpret_line;
9use crate::parsers::parse_argument;
10use thiserror::Error;
11
12#[derive(Error, Debug)]
13pub enum ParseError {
14    #[error("Line was empty (internal parser error)")]
15    EmptyLine,
16    #[error("Unable to parse line {0}: {1} ({2})")]
17    General(usize, String, String),
18    #[error("Invalid Address format {2}: {1} on line {0}, must be $x0 - $xFFFF")]
19    AddressHexFormat(usize, String, String),
20    #[error("Invalid Address format {2}: {1} on line {0}, must be $0 - $65535")]
21    AddressNumFormat(usize, String, String),
22    #[error(
23        "Address out outside of valid range {1} on line {0}, must be less than 65535 or xFFFF"
24    )]
25    AddressTooBig(usize, String),
26    #[error("Invalid Number literal format {2}: {1} on line {0}, must be 0 - 65535")]
27    NumberFormat(usize, String, String),
28    #[error("Invalid Number literal format {2}: {1} on line {0}, must be x0 - xFFFF")]
29    NumberHexFormat(usize, String, String),
30    #[error("Number literal out outside of valid range {1} on line {0}, must be less than 65535 or xFFFF")]
31    NumberTooBig(usize, String),
32    #[error("Register has invalid format {1} on line {0}, expected {2}")]
33    InvalidRegister(usize, String, String),
34    #[error("Invalid Number literal format {2}, {1} on line {0}, must be -32768 to 32767")]
35    SignedNumberNumFormat(usize, String, String),
36    #[error("Invalid Number literal format {1} on line {0}, must be -32768 to 32767")]
37    SignedNumberNumRange(usize, String),
38    #[error("This instruction only supports byte (0-255), was {1} on line {0}")]
39    NumberMustBeByte(usize, String),
40    #[error("Instruction unknown/unsupported: {1} {1:02X} on line {0}")]
41    InvalidOpCode(usize, u8),
42    #[error("Arguments {1} don't match instruction {2} (line {0}), supported: {3}")]
43    InvalidArguments(usize, String, String, String),
44    #[error("{1} (line {0}) requires arguments, supported: {2}")]
45    MissingArguments(usize, String, String),
46    #[error("No op found named '{1}', maybe you're missing the size? ('.B' or '.W') on line {0}")]
47    InvalidOpName(usize, String),
48    #[error(
49        "Invalid character literal {1}, must be one ASCII character in single quotes on line {0}"
50    )]
51    InvalidCharacter(usize, String),
52    #[error("Couldn't parse number or register for offset {1} on line {0}")]
53    InvalidOffset(usize, String),
54}
55
56impl ParseError {
57    fn num_to_addr(self) -> Self {
58        match self {
59            ParseError::NumberFormat(line_num, msg, err) => {
60                ParseError::AddressNumFormat(line_num, msg, err)
61            }
62            ParseError::NumberHexFormat(line_num, msg, err) => {
63                ParseError::AddressHexFormat(line_num, msg, err)
64            }
65            ParseError::NumberTooBig(line_num, msg) => ParseError::AddressTooBig(line_num, msg),
66            _ => self,
67        }
68    }
69}
70
71#[derive(Debug, Clone, Eq, PartialEq)]
72pub struct Line {
73    pub num: usize,
74    pub original: String,
75    pub label: Option<String>,
76    pub command: Option<(String, Vec<String>)>,
77}
78
79impl Line {
80    fn new(num: usize, original: String) -> Self {
81        Self {
82            num,
83            original,
84            label: None,
85            command: None,
86        }
87    }
88}
89
90#[derive(Debug, Eq, PartialEq, Clone)]
91pub struct Program {
92    pub lines: Vec<ParsedLine>,
93    pub bytes: Vec<u8>,
94}
95
96#[derive(Debug, Eq, PartialEq, Clone)]
97pub struct ParsedLine {
98    pub line: Line,
99    pub bytes: Vec<u8>,
100}
101
102pub fn parse_program(lines: &[&str]) -> Result<Program, ParseError> {
103    let mut output = vec![];
104    for (idx, line) in lines.iter().enumerate() {
105        let trimmed = line.trim();
106        let (trimmed, _) = trimmed.split_once('#').unwrap_or((trimmed, ""));
107        if !trimmed.is_empty() {
108            let line = interpret_line(idx, line)?;
109            output.push(parse_line(line)?);
110        }
111    }
112    let bytes = output.iter().flat_map(|line| line.bytes.clone()).collect();
113    let program = Program {
114        lines: output,
115        bytes,
116    };
117    Ok(program)
118}
119
120fn parse_line(line: Line) -> Result<ParsedLine, ParseError> {
121    let mut bytes = vec![];
122    if let Some((op, args)) = &line.command {
123        let command = op.to_ascii_uppercase();
124        let mut arguments = vec![];
125        let expects_bytes = ops::expects_bytes(&command);
126        for arg in args {
127            let arg_token = parse_argument(line.num, arg)?;
128            arguments.push(arg_token.to_argument(expects_bytes));
129        }
130        let pattern = arg_list_to_letters(&arguments);
131        bytes.push(get_op_code(line.num, &command, &pattern)?);
132        for arg in &arguments {
133            bytes.extend_from_slice(&arg.to_bytes());
134        }
135        for arg in arguments {
136            bytes.extend_from_slice(&arg.to_offset_bytes());
137        }
138    }
139    Ok(ParsedLine { line, bytes })
140}
141
142pub fn parse_line_from_str(text: &str) -> Result<ParsedLine, ParseError> {
143    let line = interpret_line(0, text)?;
144    parse_line(line)
145}
146
147#[cfg(test)]
148mod test {
149    use super::*;
150    use maikor_platform::op_params::{IND_OFFSET_REG, IND_PRE_DEC};
151    use maikor_platform::ops::{
152        ADD_REG_NUM_BYTE, ADD_REG_NUM_WORD, CMP_REG_NUM_BYTE, INC_REG_BYTE, INC_REG_WORD, JE_ADDR,
153        MEM_CPY_ADDR_REG_BYTE,
154    };
155    use maikor_platform::registers::id;
156
157    #[test]
158    fn line_test() {
159        assert_eq!(
160            parse_line_from_str("inc.w bx").unwrap(),
161            ParsedLine {
162                line: Line {
163                    num: 0,
164                    original: "inc.w bx".to_string(),
165                    label: None,
166                    command: Some(("inc.w".to_string(), vec!["bx".to_string()])),
167                },
168                bytes: vec![INC_REG_WORD, id::BX as u8],
169            }
170        );
171        assert_eq!(
172            parse_line_from_str("add.b al, 30").unwrap(),
173            ParsedLine {
174                line: Line {
175                    num: 0,
176                    original: "add.b al, 30".to_string(),
177                    label: None,
178                    command: Some((
179                        "add.b".to_string(),
180                        vec!["al".to_string(), "30".to_string()]
181                    )),
182                },
183                bytes: vec![ADD_REG_NUM_BYTE, id::AL as u8, 30],
184            }
185        );
186
187        assert!(parse_line_from_str("inc al").is_err());
188    }
189
190    #[test]
191    fn basic_test() {
192        let lines = vec!["# test program", "INC.B AL", "CMP.B AL, 1", "JE $50"];
193        let output = parse_program(&lines).unwrap();
194        assert_eq!(output.lines.len(), 3);
195        assert_eq!(
196            output.bytes,
197            vec![
198                INC_REG_BYTE,
199                id::AL,
200                CMP_REG_NUM_BYTE,
201                id::AL,
202                1,
203                JE_ADDR,
204                0,
205                50,
206            ]
207        );
208    }
209
210    #[test]
211    fn whitespace_test() {
212        let lines = vec![
213            "  mcpy $255,- (    bx ) ,  1 ",
214            " inc.b   ah      ",
215            "  add.w  (   bx +   al    )     ,  124 ",
216        ];
217        let output = parse_program(&lines).unwrap();
218        assert_eq!(output.lines.len(), 3);
219        assert_eq!(
220            output.bytes,
221            vec![
222                MEM_CPY_ADDR_REG_BYTE,
223                0,
224                255,
225                id::BX | IND_PRE_DEC,
226                1,
227                INC_REG_BYTE,
228                id::AH,
229                ADD_REG_NUM_WORD,
230                id::BX | IND_OFFSET_REG,
231                0,
232                124,
233                id::AL
234            ]
235        );
236    }
237}