y86_lib/assembler/
parser.rs

1use lazy_static::lazy_static;
2use num_derive::FromPrimitive;
3use num_traits::FromPrimitive;
4use std::collections::HashMap;
5use std::error::Error;
6use std::fmt::{self, Display, Formatter};
7
8lazy_static! {
9    static ref INSTRUCTION_CODE: HashMap<&'static str, u8> = vec![
10        ("halt", (ICode::IHALT as u8) << 4),
11        ("nop", (ICode::INOP as u8) << 4),
12        ("rrmovq", (ICode::IRRMVXX as u8) << 4),
13        ("cmovle", (ICode::IRRMVXX as u8) << 4 | 1),
14        ("cmovl", (ICode::IRRMVXX as u8) << 4 | 2),
15        ("cmove", (ICode::IRRMVXX as u8) << 4 | 3),
16        ("cmovne", (ICode::IRRMVXX as u8) << 4 | 4),
17        ("cmovge", (ICode::IRRMVXX as u8) << 4 | 5),
18        ("cmovg", (ICode::IRRMVXX as u8) << 4 | 6),
19        ("rmmovq", (ICode::IRMMOVQ as u8) << 4),
20        ("mrmovq", (ICode::IMRMOVQ as u8) << 4),
21        ("irmovq", (ICode::IIRMOVQ as u8) << 4),
22        ("addq", (ICode::IOPQ as u8) << 4),
23        ("subq", (ICode::IOPQ as u8) << 4 | 1),
24        ("andq", (ICode::IOPQ as u8) << 4 | 2),
25        ("xorq", (ICode::IOPQ as u8) << 4 | 3),
26        ("mulq", (ICode::IOPQ as u8) << 4 | 4),
27        ("divq", (ICode::IOPQ as u8) << 4 | 5),
28        ("modq", (ICode::IOPQ as u8) << 4 | 6),
29        ("jmp", (ICode::IJXX as u8) << 4),
30        ("jle", (ICode::IJXX as u8) << 4 | 1),
31        ("jl", (ICode::IJXX as u8) << 4 | 2),
32        ("je", (ICode::IJXX as u8) << 4 | 3),
33        ("jne", (ICode::IJXX as u8) << 4 | 4),
34        ("jge", (ICode::IJXX as u8) << 4 | 5),
35        ("jg", (ICode::IJXX as u8) << 4 | 6),
36        ("call", (ICode::ICALL as u8) << 4),
37        ("ret", (ICode::IRET as u8) << 4),
38        ("pushq", (ICode::IPUSHQ as u8) << 4),
39        ("popq", (ICode::IPOPQ as u8) << 4)
40    ]
41    .into_iter()
42    .collect();
43    static ref REGISTERS: HashMap<&'static str, u8> = vec![
44        ("%rax", Register::RRAX as u8),
45        ("%rcx", Register::RRCX as u8),
46        ("%rdx", Register::RRDX as u8),
47        ("%rbx", Register::RRBX as u8),
48        ("%rsp", Register::RRSP as u8),
49        ("%rbp", Register::RRBP as u8),
50        ("%rsi", Register::RRSI as u8),
51        ("%rdi", Register::RRDI as u8),
52        ("%r8", Register::RR8 as u8),
53        ("%r9", Register::RR9 as u8),
54        ("%r10", Register::RR10 as u8),
55        ("%r11", Register::RR11 as u8),
56        ("%r12", Register::RR12 as u8),
57        ("%r13", Register::RR13 as u8),
58        ("%r14", Register::RR14 as u8),
59    ]
60    .into_iter()
61    .collect();
62}
63
64pub fn parse(line: &str) -> Result<Vec<u8>, Box<dyn Error>> {
65    if line.contains(".quad") {
66        parse_quad(line)
67    } else {
68        let mut split_line = line.split(' ');
69        let instr = Parser::new(&split_line.next().unwrap().to_string())?;
70        instr.parse(line)
71    }
72}
73
74pub fn get_icode_from_string(string: &str) -> Result<ICode, Box<dyn Error>> {
75    let b: u8 = match INSTRUCTION_CODE.get(string) {
76        Some(&val) => val,
77        None => return Err(Box::new(InvalidInstructionError)),
78    };
79    get_icode_from_byte(b)
80}
81
82pub fn parse_quad(line: &str) -> Result<Vec<u8>, Box<dyn Error>> {
83    let mut split = line.split(".quad");
84    split.next();
85    let val = split.next().unwrap();
86    let parsed = get_immediate(val.trim())?;
87    let mut res = vec![];
88    push_le(&mut res, parsed);
89    Ok(res)
90}
91
92#[derive(Copy, Clone, FromPrimitive, PartialEq)]
93pub enum ICode {
94    IHALT = 0x0,
95    INOP = 0x1,
96    IRRMVXX = 0x2,
97    IIRMOVQ = 0x3,
98    IRMMOVQ = 0x4,
99    IMRMOVQ = 0x5,
100    IOPQ = 0x6,
101    IJXX = 0x7,
102    ICALL = 0x8,
103    IRET = 0x9,
104    IPUSHQ = 0xA,
105    IPOPQ = 0xB,
106    IINVALID = 0x10,
107    ITOOSHORT = 0x11,
108}
109#[derive(Copy, Clone, FromPrimitive)]
110pub enum Register {
111    RRAX = 0x0,
112    RRCX = 0x1,
113    RRDX = 0x2,
114    RRBX = 0x3,
115    RRSP = 0x4,
116    RRBP = 0x5,
117    RRSI = 0x6,
118    RRDI = 0x7,
119    RR8 = 0x8,
120    RR9 = 0x9,
121    RR10 = 0xA,
122    RR11 = 0xB,
123    RR12 = 0xC,
124    RR13 = 0xD,
125    RR14 = 0xE,
126    RNONE = 0xF,
127}
128
129pub struct Parser {
130    instruction_type: u8,
131}
132
133#[derive(Debug)]
134struct InvalidInstructionError;
135
136impl std::error::Error for InvalidInstructionError {}
137
138impl Display for InvalidInstructionError {
139    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
140        write!(f, "Invalid instruction")
141    }
142}
143
144#[derive(Debug)]
145struct InvalidRegisterError;
146
147impl std::error::Error for InvalidRegisterError {}
148
149impl Display for InvalidRegisterError {
150    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
151        write!(f, "Invalid Register")
152    }
153}
154
155pub fn get_icode_from_byte(b: u8) -> Result<ICode, Box<dyn std::error::Error>> {
156    match FromPrimitive::from_u8(b >> 4) {
157        Some(val) => Ok(val),
158        None => Err(Box::new(InvalidInstructionError)),
159    }
160}
161
162impl Parser {
163    pub fn new(instr: &str) -> Result<Self, Box<dyn std::error::Error>> {
164        let instruction_type = match INSTRUCTION_CODE.get(&instr[..]) {
165            Some(&val) => val,
166            None => return Err(Box::new(InvalidInstructionError)),
167        };
168        Ok(Parser { instruction_type })
169    }
170
171    pub fn parse(&self, line: &str) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
172        let mut res = vec![self.instruction_type];
173        match get_icode_from_byte(self.instruction_type)? {
174            ICode::IIRMOVQ => parse_irmovq(line, &mut res)?,
175            ICode::IRRMVXX | ICode::IOPQ => parse_rr_opq(line, &mut res)?,
176            ICode::IMRMOVQ => parse_mrmovq(line, &mut res)?,
177            ICode::IRMMOVQ => parse_rmmovq(line, &mut res)?,
178            ICode::IJXX | ICode::ICALL => parse_jxx_call(line, &mut res)?,
179            ICode::IRET | ICode::IHALT | ICode::INOP => {}
180            ICode::IPUSHQ | ICode::IPOPQ => parse_push_pop(line, &mut res)?,
181            _ => return Err(Box::new(InvalidInstructionError)),
182        };
183        Ok(res)
184    }
185}
186
187fn form_byte(first: u8, second: u8) -> u8 {
188    ((first << 4) & 0xF0) | (second & 0x0F)
189}
190
191fn get_immediate(value: &str) -> Result<u64, Box<dyn std::error::Error>> {
192    crate::number_parser::parse_num(value)
193}
194
195fn get_register(value: &str) -> Result<u8, Box<dyn std::error::Error>> {
196    match REGISTERS.get(value.trim()) {
197        Some(&val) => Ok(val),
198        None => Err(Box::new(InvalidRegisterError)),
199    }
200}
201
202fn push_le(vec: &mut Vec<u8>, val: u64) {
203    for i in 0..8 {
204        vec.push((val >> (i * 8)) as u8);
205    }
206}
207
208fn parse_irmovq(line: &str, res: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>> {
209    let mut split = line.split(',');
210    let instr_val = split.next().unwrap();
211    let mut instr_val = instr_val.split(' ');
212    instr_val.next();
213    let mut first = instr_val.next();
214    while first.is_some() && first.unwrap() == "" {
215        first = instr_val.next();
216    }
217    let val_c = get_immediate(first.unwrap().trim())?;
218    let reg = get_register(split.next().unwrap().trim())?;
219    let b: u8 = form_byte(0x0F, reg);
220    res.push(b);
221    push_le(res, val_c);
222    Ok(())
223}
224
225fn parse_rr_opq(line: &str, res: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>> {
226    let mut split = line.split(',');
227    let instr_reg = split.next().unwrap().trim();
228    let mut reg_split = instr_reg.split(' ');
229    reg_split.next();
230    let mut first = reg_split.next();
231    while first.is_some() && first.unwrap() == "" {
232        first = reg_split.next();
233    }
234    let reg_a = get_register(first.unwrap().trim())?;
235    let reg_b = get_register(split.next().unwrap().trim())?;
236    res.push(form_byte(reg_a, reg_b));
237    Ok(())
238}
239fn parse_mrmovq(line: &str, res: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>> {
240    let mut split = line.split(',');
241    let first = split.next().unwrap().trim();
242    let mut imm_reg_split = first.split(' ');
243    imm_reg_split.next();
244    let mut first = imm_reg_split.next();
245    while first.is_some() && first.unwrap() == "" {
246        first = imm_reg_split.next();
247    }
248    let mem_brackets = first.unwrap().trim();
249    let mut num_reg_b = mem_brackets.split('(');
250    let val_c = get_immediate(num_reg_b.next().unwrap().trim())?;
251    let mut reg_only = num_reg_b.next().unwrap().split(')');
252    let reg_b = get_register(reg_only.next().unwrap().trim())?;
253    let reg_a = get_register(split.next().unwrap().trim())?;
254    res.push(form_byte(reg_a, reg_b));
255    push_le(res, val_c);
256    Ok(())
257}
258fn parse_rmmovq(line: &str, res: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>> {
259    let mut split = line.split(',');
260    let first = split.next().unwrap().trim();
261    let mut instr_reg_a = first.split(' ');
262    instr_reg_a.next();
263    let mut first = instr_reg_a.next();
264    while first.is_some() && first.unwrap() == "" {
265        first = instr_reg_a.next();
266    }
267    let reg_a = get_register(first.unwrap().trim())?;
268    let mem_brackets = split.next().unwrap().trim();
269    let mut num_reg_b = mem_brackets.split('(');
270    let val_c = get_immediate(num_reg_b.next().unwrap().trim().trim())?;
271    let mut reg_only = num_reg_b.next().unwrap().trim().split(')');
272    let reg_b = get_register(reg_only.next().unwrap().trim())?;
273    res.push(form_byte(reg_a, reg_b));
274    push_le(res, val_c);
275    Ok(())
276}
277fn parse_jxx_call(line: &str, res: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>> {
278    let mut split = line.trim().split(' ');
279    split.next();
280    let mut first = split.next();
281    while first.is_some() && first.unwrap() == "" {
282        first = split.next();
283    }
284    let val_c = get_immediate(first.unwrap().trim())?;
285    push_le(res, val_c);
286    Ok(())
287}
288
289fn parse_push_pop(line: &str, res: &mut Vec<u8>) -> Result<(), Box<dyn std::error::Error>> {
290    let mut split = line.trim().split(' ');
291    split.next();
292    let mut first = split.next();
293    while first.is_some() && first.unwrap() == "" {
294        first = split.next();
295    }
296    let reg_a = get_register(first.unwrap().trim())?;
297    res.push(form_byte(reg_a, 0x0F));
298    Ok(())
299}