sbpf_assembler/
lexer.rs

1use crate::bug;
2use crate::errors::CompileError;
3use crate::opcode::Opcode;
4use std::ops::Range;
5
6#[derive(Debug, Clone)]
7pub enum Op {
8    Add,
9    Sub,
10    Mul,
11    Div,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum ImmediateValue {
16    Int(i64),
17    Addr(i64),
18}
19
20impl std::ops::Add for ImmediateValue {
21    type Output = ImmediateValue;
22    fn add(self, other: Self) -> ImmediateValue {
23        match (self, other) {
24            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a + b),
25            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
26            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
27            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a + b),
28        }
29    }
30}
31
32impl std::ops::Sub for ImmediateValue {
33    type Output = ImmediateValue;
34    fn sub(self, other: Self) -> ImmediateValue {
35        match (self, other) {
36            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a - b),
37            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
38            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
39            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a - b),
40        }
41    }
42}
43
44impl std::ops::Mul for ImmediateValue {
45    type Output = ImmediateValue;
46    fn mul(self, other: Self) -> ImmediateValue {
47        match (self, other) {
48            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a * b),
49            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
50            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
51            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a * b),
52        }
53    }
54}
55
56impl std::ops::Div for ImmediateValue {
57    type Output = ImmediateValue;
58    fn div(self, other: Self) -> ImmediateValue {
59        match (self, other) {
60            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a / b),
61            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
62            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
63            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a / b),
64        }
65    }
66}
67
68#[derive(Debug, Clone)]
69pub enum Token {
70    Directive(String, Range<usize>),
71    Label(String, Range<usize>),
72    Identifier(String, Range<usize>),
73    Opcode(Opcode, Range<usize>),
74    Register(u8, Range<usize>),
75    ImmediateValue(ImmediateValue, Range<usize>),
76    BinaryOp(Op, Range<usize>),
77    StringLiteral(String, Range<usize>),
78    VectorLiteral(Vec<ImmediateValue>, Range<usize>),
79
80    LeftBracket(Range<usize>),
81    RightBracket(Range<usize>),
82    LeftParen(Range<usize>),
83    RightParen(Range<usize>),
84    Comma(Range<usize>),
85    Colon(Range<usize>),
86
87    Newline(Range<usize>),
88}
89
90pub fn tokenize(source: &str) -> Result<Vec<Token>, Vec<CompileError>> {
91    let mut tokens = Vec::new();
92    let mut errors = Vec::new();
93    let mut byte_offset = 0;
94
95    let mut paren_stack : Vec<Token> = Vec::new();
96
97    for line in source.lines() {
98        if line.is_empty() {
99            byte_offset += 1;
100            continue;
101        }
102        let mut chars = line.char_indices().peekable();
103        while let Some((start_idx, c)) = chars.peek() {
104            let token_start = byte_offset + start_idx;
105            match c {
106                c if c.is_ascii_digit() => {
107                    let mut number = String::new();
108                    let mut is_addr = false;
109                    while let Some((_, c)) = chars.peek() {
110                        if c.is_digit(10) {
111                            number.push(chars.next().unwrap().1);
112                        } else if number == "0" && *c == 'x' {
113                            chars.next();
114                            is_addr = true; /*  */ number = String::new();
115                        } else if is_addr && (*c == 'a' || *c == 'b' || *c == 'c' || *c == 'd' || *c == 'e' || *c == 'f') {
116                            number.push(chars.next().unwrap().1);
117                        } else {
118                            break;
119                        }
120                    }
121                    let span = token_start..token_start + number.len();
122                    if is_addr {
123                        if let Ok(value) = u64::from_str_radix(&number, 16) {
124                            let value = value as i64;
125                            tokens.push(Token::ImmediateValue(ImmediateValue::Addr(value), span.clone()));
126                        } else {
127                            errors.push(CompileError::InvalidNumber { number, span: span.clone(), custom_label: None });
128                        }
129                    } else {
130                        if let Ok(value) = number.parse::<i64>() {
131                            tokens.push(Token::ImmediateValue(ImmediateValue::Int(value), span.clone()));
132                        } else {
133                            errors.push(CompileError::InvalidNumber { number, span: span.clone(), custom_label: None });
134                        }
135                    }      
136                }
137
138                c if c.is_ascii_alphanumeric() || *c == '_' => {
139                    let mut identifier = String::new();
140                    while let Some((_, c)) = chars.peek() {
141                        if *c == '_' || *c == ':' || *c == '.' || c.is_ascii_alphanumeric() { 
142                            identifier.push(chars.next().unwrap().1);
143                        } else {
144                            break;
145                        }
146                    }
147                    let span = token_start..token_start + identifier.len();
148                    if identifier.ends_with(':') {
149                        let label_name = identifier.trim_end_matches(':').to_string();
150                        tokens.push(Token::Label(label_name, span));
151                    } else if identifier.starts_with('r') && identifier[1..].chars().all(|c| c.is_ascii_digit()) {
152                        // TODO: label name can be "r"
153                        if let Ok(value) = identifier[1..].parse::<u8>() {
154                            tokens.push(Token::Register(value, span.clone()));
155                        } else {
156                            errors.push(CompileError::InvalidRegister { register: identifier, span: span.clone(), custom_label: None });
157                        }
158                    } else if let Ok(opcode) = Opcode::from_str(&identifier) {
159                        tokens.push(Token::Opcode(opcode, span));
160                    } else {
161                        tokens.push(Token::Identifier(identifier, span));
162                    }
163                }
164                c if c.is_whitespace() => {
165                    chars.next();
166                }
167                '+' => {
168                    chars.next();
169                    let span = token_start..token_start + 1;
170                    tokens.push(Token::BinaryOp(Op::Add, span));
171                }
172                '-' => {
173                    chars.next();
174                    let span = token_start..token_start + 1;
175                    tokens.push(Token::BinaryOp(Op::Sub, span));
176                }
177                '*' => {
178                    chars.next();
179                    let span = token_start..token_start + 1;
180                    tokens.push(Token::BinaryOp(Op::Mul, span));
181                }
182                '.' => {
183                    chars.next();
184                    let directive: String = chars.by_ref()
185                        .take_while(|(_, c)| c.is_ascii_alphanumeric() || *c == '_')
186                        .map(|(_, c)| c)
187                        .collect();
188                    let span = token_start..token_start + directive.len() + 1;
189                    tokens.push(Token::Directive(directive, span));
190                }
191                '"' => {
192                    chars.next();
193                    let mut string_literal = String::new();
194                    while let Some((_, c)) = chars.peek() {
195                        if *c == '"' {
196                            chars.next();
197                            let span = token_start..token_start + string_literal.len() + 2;
198                            tokens.push(Token::StringLiteral(string_literal, span));
199                            break;
200                        } else if *c == '\n' {
201                            errors.push(CompileError::UnterminatedStringLiteral { span: token_start..token_start + 1, custom_label: None });
202                        }
203                        string_literal.push(chars.next().unwrap().1);
204                    }
205                }
206                '(' => {
207                    chars.next();
208                    let span = token_start..token_start + 1;
209                    let token = Token::LeftParen(span);
210                    paren_stack.push(token.clone());
211                    tokens.push(token);
212                }
213                ')' => {
214                    chars.next();
215                    let span = token_start..token_start + 1;
216                    paren_stack.pop();
217                    tokens.push(Token::RightParen(span));
218                }
219                '[' => {
220                    chars.next();
221                    let span = token_start..token_start + 1;
222                    tokens.push(Token::LeftBracket(span));
223                }
224                ']' => {
225                    chars.next();
226                    let span = token_start..token_start + 1;
227                    tokens.push(Token::RightBracket(span));
228                }
229                ',' => {
230                    chars.next();
231                    let span = token_start..token_start + 1;
232                    tokens.push(Token::Comma(span));
233                }
234                // handle comments
235                '#' => {
236                    chars.next();
237                    break;
238                }
239                '/' => {
240                    chars.next();
241                    if let Some((_, '/')) = chars.peek() {
242                        chars.next();
243                        break;
244                    } else {
245                        chars.next();
246                        let span = token_start..token_start + 1;
247                        tokens.push(Token::BinaryOp(Op::Div, span));
248                    }
249                }
250                _ => {
251                    let span = token_start..token_start + 1;
252                    errors.push(CompileError::UnexpectedCharacter { character: *c, span, custom_label: None });
253                    chars.next();
254                }
255            }
256        }
257        byte_offset += line.len();
258        // tokens.push(Token::Newline(byte_offset..byte_offset + 1));
259        byte_offset += 1;
260    }
261
262    while !paren_stack.is_empty() {
263        let Token::LeftParen(span) = paren_stack.pop().unwrap() else {
264            bug!("this stack should only contain left paren tokens")
265        };
266        errors.push(CompileError::UnmatchedParen { span, custom_label: None });
267    }
268    
269    if errors.is_empty() {
270        Ok(tokens)
271    } else {
272        Err(errors)
273    }
274}