1use crate::errors::CompileError;
2use sbpf_common::opcode::Opcode;
3use std::{ops::Range, str::FromStr as _};
4
5#[derive(Debug, Clone)]
6pub enum Op {
7 Add,
8 Sub,
9 Mul,
10 Div,
11}
12
13#[derive(Debug, Clone, PartialEq)]
14pub enum ImmediateValue {
15 Int(i64),
16 Addr(i64),
17}
18
19impl std::ops::Add for ImmediateValue {
20 type Output = ImmediateValue;
21 fn add(self, other: Self) -> ImmediateValue {
22 match (self, other) {
23 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a + b),
24 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
25 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
26 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a + b),
27 }
28 }
29}
30
31impl std::ops::Sub for ImmediateValue {
32 type Output = ImmediateValue;
33 fn sub(self, other: Self) -> ImmediateValue {
34 match (self, other) {
35 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a - b),
36 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
37 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
38 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a - b),
39 }
40 }
41}
42
43impl std::ops::Mul for ImmediateValue {
44 type Output = ImmediateValue;
45 fn mul(self, other: Self) -> ImmediateValue {
46 match (self, other) {
47 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a * b),
48 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
49 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
50 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a * b),
51 }
52 }
53}
54
55impl std::ops::Div for ImmediateValue {
56 type Output = ImmediateValue;
57 fn div(self, other: Self) -> ImmediateValue {
58 match (self, other) {
59 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a / b),
60 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
61 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
62 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a / b),
63 }
64 }
65}
66
67#[derive(Debug, Clone)]
68pub enum Token {
69 Directive(String, Range<usize>),
70 Label(String, Range<usize>),
71 Identifier(String, Range<usize>),
72 Opcode(Opcode, Range<usize>),
73 Register(u8, Range<usize>),
74 ImmediateValue(ImmediateValue, Range<usize>),
75 BinaryOp(Op, Range<usize>),
76 StringLiteral(String, Range<usize>),
77 VectorLiteral(Vec<ImmediateValue>, Range<usize>),
78
79 LeftBracket(Range<usize>),
80 RightBracket(Range<usize>),
81 LeftParen(Range<usize>),
82 RightParen(Range<usize>),
83 Comma(Range<usize>),
84 Colon(Range<usize>),
85
86 Newline(Range<usize>),
87}
88
89pub fn tokenize(source: &str) -> Result<Vec<Token>, Vec<CompileError>> {
90 let mut tokens = Vec::new();
91 let mut errors = Vec::new();
92 let mut byte_offset = 0;
93
94 let mut paren_stack: Vec<Token> = Vec::new();
95
96 for line in source.lines() {
97 if line.is_empty() {
98 byte_offset += 1;
99 continue;
100 }
101 let mut chars = line.char_indices().peekable();
102 while let Some((start_idx, c)) = chars.peek() {
103 let token_start = byte_offset + start_idx;
104 match c {
105 c if c.is_ascii_digit() => {
106 let mut number = String::new();
107 let mut is_addr = false;
108 while let Some((_, c)) = chars.peek() {
109 if c.is_ascii_digit() {
110 number.push(chars.next().unwrap().1);
111 } else if number == "0" && *c == 'x' {
112 chars.next();
113 is_addr = true; number = String::new();
115 } else if is_addr
116 && (*c == 'a'
117 || *c == 'b'
118 || *c == 'c'
119 || *c == 'd'
120 || *c == 'e'
121 || *c == 'f')
122 {
123 number.push(chars.next().unwrap().1);
124 } else {
125 break;
126 }
127 }
128 let span = token_start..token_start + number.len();
129 if is_addr {
130 if let Ok(value) = u64::from_str_radix(&number, 16) {
131 let value = value as i64;
132 tokens.push(Token::ImmediateValue(
133 ImmediateValue::Addr(value),
134 span.clone(),
135 ));
136 } else {
137 errors.push(CompileError::InvalidNumber {
138 number,
139 span: span.clone(),
140 custom_label: None,
141 });
142 }
143 } else if let Ok(value) = number.parse::<i64>() {
144 tokens.push(Token::ImmediateValue(
145 ImmediateValue::Int(value),
146 span.clone(),
147 ));
148 } else {
149 errors.push(CompileError::InvalidNumber {
150 number,
151 span: span.clone(),
152 custom_label: None,
153 });
154 }
155 }
156
157 c if c.is_ascii_alphanumeric() || *c == '_' => {
158 let mut identifier = String::new();
159 while let Some((_, c)) = chars.peek() {
160 if *c == '_' || *c == ':' || *c == '.' || c.is_ascii_alphanumeric() {
161 identifier.push(chars.next().unwrap().1);
162 } else {
163 break;
164 }
165 }
166 let span = token_start..token_start + identifier.len();
167 if identifier.ends_with(':') {
168 let label_name = identifier.trim_end_matches(':').to_string();
169 tokens.push(Token::Label(label_name, span));
170 } else if identifier.starts_with('r')
171 && identifier[1..].chars().all(|c| c.is_ascii_digit())
172 {
173 if let Ok(value) = identifier[1..].parse::<u8>() {
175 tokens.push(Token::Register(value, span.clone()));
176 } else {
177 errors.push(CompileError::InvalidRegister {
178 register: identifier,
179 span: span.clone(),
180 custom_label: None,
181 });
182 }
183 } else if let Ok(opcode) = Opcode::from_str(&identifier) {
184 tokens.push(Token::Opcode(opcode, span));
185 } else {
186 tokens.push(Token::Identifier(identifier, span));
187 }
188 }
189 c if c.is_whitespace() => {
190 chars.next();
191 }
192 '+' => {
193 chars.next();
194 let span = token_start..token_start + 1;
195 tokens.push(Token::BinaryOp(Op::Add, span));
196 }
197 '-' => {
198 chars.next();
199 let span = token_start..token_start + 1;
200 tokens.push(Token::BinaryOp(Op::Sub, span));
201 }
202 '*' => {
203 chars.next();
204 let span = token_start..token_start + 1;
205 tokens.push(Token::BinaryOp(Op::Mul, span));
206 }
207 '.' => {
208 chars.next();
209 let directive: String = chars
210 .by_ref()
211 .take_while(|(_, c)| c.is_ascii_alphanumeric() || *c == '_')
212 .map(|(_, c)| c)
213 .collect();
214 let span = token_start..token_start + directive.len() + 1;
215 tokens.push(Token::Directive(directive, span));
216 }
217 '"' => {
218 chars.next();
219 let mut string_literal = String::new();
220 while let Some((_, c)) = chars.peek() {
221 if *c == '"' {
222 chars.next();
223 let span = token_start..token_start + string_literal.len() + 2;
224 tokens.push(Token::StringLiteral(string_literal, span));
225 break;
226 } else if *c == '\n' {
227 errors.push(CompileError::UnterminatedStringLiteral {
228 span: token_start..token_start + 1,
229 custom_label: None,
230 });
231 }
232 string_literal.push(chars.next().unwrap().1);
233 }
234 }
235 '(' => {
236 chars.next();
237 let span = token_start..token_start + 1;
238 let token = Token::LeftParen(span);
239 paren_stack.push(token.clone());
240 tokens.push(token);
241 }
242 ')' => {
243 chars.next();
244 let span = token_start..token_start + 1;
245 paren_stack.pop();
246 tokens.push(Token::RightParen(span));
247 }
248 '[' => {
249 chars.next();
250 let span = token_start..token_start + 1;
251 tokens.push(Token::LeftBracket(span));
252 }
253 ']' => {
254 chars.next();
255 let span = token_start..token_start + 1;
256 tokens.push(Token::RightBracket(span));
257 }
258 ',' => {
259 chars.next();
260 let span = token_start..token_start + 1;
261 tokens.push(Token::Comma(span));
262 }
263 '#' => {
265 chars.next();
266 break;
267 }
268 '/' => {
269 chars.next();
270 if let Some((_, '/')) = chars.peek() {
271 chars.next();
272 break;
273 } else {
274 chars.next();
275 let span = token_start..token_start + 1;
276 tokens.push(Token::BinaryOp(Op::Div, span));
277 }
278 }
279 _ => {
280 let span = token_start..token_start + 1;
281 errors.push(CompileError::UnexpectedCharacter {
282 character: *c,
283 span,
284 custom_label: None,
285 });
286 chars.next();
287 }
288 }
289 }
290 byte_offset += line.len();
291 byte_offset += 1;
293 }
294
295 while let Some(Token::LeftParen(span)) = paren_stack.pop() {
296 errors.push(CompileError::UnmatchedParen {
297 span,
298 custom_label: None,
299 });
300 }
301
302 if errors.is_empty() {
303 Ok(tokens)
304 } else {
305 Err(errors)
306 }
307}