sbpf_assembler/
lexer.rs

1use crate::errors::CompileError;
2use sbpf_common::opcode::Opcode;
3use std::{ops::Range, str::FromStr as _};
4
5#[derive(Debug, Clone)]
6pub enum Op {
7    Add,
8    Sub,
9    Mul,
10    Div,
11}
12
13#[derive(Debug, Clone, PartialEq)]
14pub enum ImmediateValue {
15    Int(i64),
16    Addr(i64),
17}
18
19impl std::ops::Add for ImmediateValue {
20    type Output = ImmediateValue;
21    fn add(self, other: Self) -> ImmediateValue {
22        match (self, other) {
23            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a + b),
24            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
25            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
26            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a + b),
27        }
28    }
29}
30
31impl std::ops::Sub for ImmediateValue {
32    type Output = ImmediateValue;
33    fn sub(self, other: Self) -> ImmediateValue {
34        match (self, other) {
35            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a - b),
36            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
37            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
38            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a - b),
39        }
40    }
41}
42
43impl std::ops::Mul for ImmediateValue {
44    type Output = ImmediateValue;
45    fn mul(self, other: Self) -> ImmediateValue {
46        match (self, other) {
47            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a * b),
48            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
49            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
50            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a * b),
51        }
52    }
53}
54
55impl std::ops::Div for ImmediateValue {
56    type Output = ImmediateValue;
57    fn div(self, other: Self) -> ImmediateValue {
58        match (self, other) {
59            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a / b),
60            (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
61            (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
62            (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a / b),
63        }
64    }
65}
66
67#[derive(Debug, Clone)]
68pub enum Token {
69    Directive(String, Range<usize>),
70    Label(String, Range<usize>),
71    Identifier(String, Range<usize>),
72    Opcode(Opcode, Range<usize>),
73    Register(u8, Range<usize>),
74    ImmediateValue(ImmediateValue, Range<usize>),
75    BinaryOp(Op, Range<usize>),
76    StringLiteral(String, Range<usize>),
77    VectorLiteral(Vec<ImmediateValue>, Range<usize>),
78
79    LeftBracket(Range<usize>),
80    RightBracket(Range<usize>),
81    LeftParen(Range<usize>),
82    RightParen(Range<usize>),
83    Comma(Range<usize>),
84    Colon(Range<usize>),
85
86    Newline(Range<usize>),
87}
88
89pub fn tokenize(source: &str) -> Result<Vec<Token>, Vec<CompileError>> {
90    let mut tokens = Vec::new();
91    let mut errors = Vec::new();
92    let mut byte_offset = 0;
93
94    let mut paren_stack: Vec<Token> = Vec::new();
95
96    for line in source.lines() {
97        if line.is_empty() {
98            byte_offset += 1;
99            continue;
100        }
101        let mut chars = line.char_indices().peekable();
102        while let Some((start_idx, c)) = chars.peek() {
103            let token_start = byte_offset + start_idx;
104            match c {
105                c if c.is_ascii_digit() => {
106                    let mut number = String::new();
107                    let mut is_addr = false;
108                    while let Some((_, c)) = chars.peek() {
109                        if c.is_ascii_digit() {
110                            number.push(chars.next().unwrap().1);
111                        } else if number == "0" && *c == 'x' {
112                            chars.next();
113                            is_addr = true; /*  */
114                            number = String::new();
115                        } else if is_addr
116                            && (*c == 'a'
117                                || *c == 'b'
118                                || *c == 'c'
119                                || *c == 'd'
120                                || *c == 'e'
121                                || *c == 'f')
122                        {
123                            number.push(chars.next().unwrap().1);
124                        } else {
125                            break;
126                        }
127                    }
128                    let span = token_start..token_start + number.len();
129                    if is_addr {
130                        if let Ok(value) = u64::from_str_radix(&number, 16) {
131                            let value = value as i64;
132                            tokens.push(Token::ImmediateValue(
133                                ImmediateValue::Addr(value),
134                                span.clone(),
135                            ));
136                        } else {
137                            errors.push(CompileError::InvalidNumber {
138                                number,
139                                span: span.clone(),
140                                custom_label: None,
141                            });
142                        }
143                    } else if let Ok(value) = number.parse::<i64>() {
144                        tokens.push(Token::ImmediateValue(
145                            ImmediateValue::Int(value),
146                            span.clone(),
147                        ));
148                    } else {
149                        errors.push(CompileError::InvalidNumber {
150                            number,
151                            span: span.clone(),
152                            custom_label: None,
153                        });
154                    }
155                }
156
157                c if c.is_ascii_alphanumeric() || *c == '_' => {
158                    let mut identifier = String::new();
159                    while let Some((_, c)) = chars.peek() {
160                        if *c == '_' || *c == ':' || *c == '.' || c.is_ascii_alphanumeric() {
161                            identifier.push(chars.next().unwrap().1);
162                        } else {
163                            break;
164                        }
165                    }
166                    let span = token_start..token_start + identifier.len();
167                    if identifier.ends_with(':') {
168                        let label_name = identifier.trim_end_matches(':').to_string();
169                        tokens.push(Token::Label(label_name, span));
170                    } else if identifier.starts_with('r')
171                        && identifier[1..].chars().all(|c| c.is_ascii_digit())
172                    {
173                        // TODO: label name can be "r"
174                        if let Ok(value) = identifier[1..].parse::<u8>() {
175                            tokens.push(Token::Register(value, span.clone()));
176                        } else {
177                            errors.push(CompileError::InvalidRegister {
178                                register: identifier,
179                                span: span.clone(),
180                                custom_label: None,
181                            });
182                        }
183                    } else if let Ok(opcode) = Opcode::from_str(&identifier) {
184                        tokens.push(Token::Opcode(opcode, span));
185                    } else {
186                        tokens.push(Token::Identifier(identifier, span));
187                    }
188                }
189                c if c.is_whitespace() => {
190                    chars.next();
191                }
192                '+' => {
193                    chars.next();
194                    let span = token_start..token_start + 1;
195                    tokens.push(Token::BinaryOp(Op::Add, span));
196                }
197                '-' => {
198                    chars.next();
199                    let span = token_start..token_start + 1;
200                    tokens.push(Token::BinaryOp(Op::Sub, span));
201                }
202                '*' => {
203                    chars.next();
204                    let span = token_start..token_start + 1;
205                    tokens.push(Token::BinaryOp(Op::Mul, span));
206                }
207                '.' => {
208                    chars.next();
209                    let directive: String = chars
210                        .by_ref()
211                        .take_while(|(_, c)| c.is_ascii_alphanumeric() || *c == '_')
212                        .map(|(_, c)| c)
213                        .collect();
214                    let span = token_start..token_start + directive.len() + 1;
215                    tokens.push(Token::Directive(directive, span));
216                }
217                '"' => {
218                    chars.next();
219                    let mut string_literal = String::new();
220                    while let Some((_, c)) = chars.peek() {
221                        if *c == '"' {
222                            chars.next();
223                            let span = token_start..token_start + string_literal.len() + 2;
224                            tokens.push(Token::StringLiteral(string_literal, span));
225                            break;
226                        } else if *c == '\n' {
227                            errors.push(CompileError::UnterminatedStringLiteral {
228                                span: token_start..token_start + 1,
229                                custom_label: None,
230                            });
231                        }
232                        string_literal.push(chars.next().unwrap().1);
233                    }
234                }
235                '(' => {
236                    chars.next();
237                    let span = token_start..token_start + 1;
238                    let token = Token::LeftParen(span);
239                    paren_stack.push(token.clone());
240                    tokens.push(token);
241                }
242                ')' => {
243                    chars.next();
244                    let span = token_start..token_start + 1;
245                    paren_stack.pop();
246                    tokens.push(Token::RightParen(span));
247                }
248                '[' => {
249                    chars.next();
250                    let span = token_start..token_start + 1;
251                    tokens.push(Token::LeftBracket(span));
252                }
253                ']' => {
254                    chars.next();
255                    let span = token_start..token_start + 1;
256                    tokens.push(Token::RightBracket(span));
257                }
258                ',' => {
259                    chars.next();
260                    let span = token_start..token_start + 1;
261                    tokens.push(Token::Comma(span));
262                }
263                // handle comments
264                '#' => {
265                    chars.next();
266                    break;
267                }
268                '/' => {
269                    chars.next();
270                    if let Some((_, '/')) = chars.peek() {
271                        chars.next();
272                        break;
273                    } else {
274                        chars.next();
275                        let span = token_start..token_start + 1;
276                        tokens.push(Token::BinaryOp(Op::Div, span));
277                    }
278                }
279                _ => {
280                    let span = token_start..token_start + 1;
281                    errors.push(CompileError::UnexpectedCharacter {
282                        character: *c,
283                        span,
284                        custom_label: None,
285                    });
286                    chars.next();
287                }
288            }
289        }
290        byte_offset += line.len();
291        // tokens.push(Token::Newline(byte_offset..byte_offset + 1));
292        byte_offset += 1;
293    }
294
295    while let Some(Token::LeftParen(span)) = paren_stack.pop() {
296        errors.push(CompileError::UnmatchedParen {
297            span,
298            custom_label: None,
299        });
300    }
301
302    if errors.is_empty() {
303        Ok(tokens)
304    } else {
305        Err(errors)
306    }
307}