1use crate::bug;
2use crate::errors::CompileError;
3use crate::opcode::Opcode;
4use std::ops::Range;
5
6#[derive(Debug, Clone)]
7pub enum Op {
8 Add,
9 Sub,
10 Mul,
11 Div,
12}
13
14#[derive(Debug, Clone, PartialEq)]
15pub enum ImmediateValue {
16 Int(i64),
17 Addr(i64),
18}
19
20impl std::ops::Add for ImmediateValue {
21 type Output = ImmediateValue;
22 fn add(self, other: Self) -> ImmediateValue {
23 match (self, other) {
24 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a + b),
25 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
26 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a + b),
27 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a + b),
28 }
29 }
30}
31
32impl std::ops::Sub for ImmediateValue {
33 type Output = ImmediateValue;
34 fn sub(self, other: Self) -> ImmediateValue {
35 match (self, other) {
36 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a - b),
37 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
38 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a - b),
39 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a - b),
40 }
41 }
42}
43
44impl std::ops::Mul for ImmediateValue {
45 type Output = ImmediateValue;
46 fn mul(self, other: Self) -> ImmediateValue {
47 match (self, other) {
48 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a * b),
49 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
50 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a * b),
51 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a * b),
52 }
53 }
54}
55
56impl std::ops::Div for ImmediateValue {
57 type Output = ImmediateValue;
58 fn div(self, other: Self) -> ImmediateValue {
59 match (self, other) {
60 (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a / b),
61 (ImmediateValue::Addr(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
62 (ImmediateValue::Int(a), ImmediateValue::Addr(b)) => ImmediateValue::Addr(a / b),
63 (ImmediateValue::Addr(a), ImmediateValue::Int(b)) => ImmediateValue::Addr(a / b),
64 }
65 }
66}
67
68#[derive(Debug, Clone)]
69pub enum Token {
70 Directive(String, Range<usize>),
71 Label(String, Range<usize>),
72 Identifier(String, Range<usize>),
73 Opcode(Opcode, Range<usize>),
74 Register(u8, Range<usize>),
75 ImmediateValue(ImmediateValue, Range<usize>),
76 BinaryOp(Op, Range<usize>),
77 StringLiteral(String, Range<usize>),
78 VectorLiteral(Vec<ImmediateValue>, Range<usize>),
79
80 LeftBracket(Range<usize>),
81 RightBracket(Range<usize>),
82 LeftParen(Range<usize>),
83 RightParen(Range<usize>),
84 Comma(Range<usize>),
85 Colon(Range<usize>),
86
87 Newline(Range<usize>),
88}
89
90pub fn tokenize(source: &str) -> Result<Vec<Token>, Vec<CompileError>> {
91 let mut tokens = Vec::new();
92 let mut errors = Vec::new();
93 let mut byte_offset = 0;
94
95 let mut paren_stack : Vec<Token> = Vec::new();
96
97 for line in source.lines() {
98 if line.is_empty() {
99 byte_offset += 1;
100 continue;
101 }
102 let mut chars = line.char_indices().peekable();
103 while let Some((start_idx, c)) = chars.peek() {
104 let token_start = byte_offset + start_idx;
105 match c {
106 c if c.is_ascii_digit() => {
107 let mut number = String::new();
108 let mut is_addr = false;
109 while let Some((_, c)) = chars.peek() {
110 if c.is_digit(10) {
111 number.push(chars.next().unwrap().1);
112 } else if number == "0" && *c == 'x' {
113 chars.next();
114 is_addr = true; number = String::new();
115 } else if is_addr && (*c == 'a' || *c == 'b' || *c == 'c' || *c == 'd' || *c == 'e' || *c == 'f') {
116 number.push(chars.next().unwrap().1);
117 } else {
118 break;
119 }
120 }
121 let span = token_start..token_start + number.len();
122 if is_addr {
123 if let Ok(value) = u64::from_str_radix(&number, 16) {
124 let value = value as i64;
125 tokens.push(Token::ImmediateValue(ImmediateValue::Addr(value), span.clone()));
126 } else {
127 errors.push(CompileError::InvalidNumber { number, span: span.clone(), custom_label: None });
128 }
129 } else {
130 if let Ok(value) = number.parse::<i64>() {
131 tokens.push(Token::ImmediateValue(ImmediateValue::Int(value), span.clone()));
132 } else {
133 errors.push(CompileError::InvalidNumber { number, span: span.clone(), custom_label: None });
134 }
135 }
136 }
137
138 c if c.is_ascii_alphanumeric() || *c == '_' => {
139 let mut identifier = String::new();
140 while let Some((_, c)) = chars.peek() {
141 if *c == '_' || *c == ':' || *c == '.' || c.is_ascii_alphanumeric() {
142 identifier.push(chars.next().unwrap().1);
143 } else {
144 break;
145 }
146 }
147 let span = token_start..token_start + identifier.len();
148 if identifier.ends_with(':') {
149 let label_name = identifier.trim_end_matches(':').to_string();
150 tokens.push(Token::Label(label_name, span));
151 } else if identifier.starts_with('r') && identifier[1..].chars().all(|c| c.is_ascii_digit()) {
152 if let Ok(value) = identifier[1..].parse::<u8>() {
154 tokens.push(Token::Register(value, span.clone()));
155 } else {
156 errors.push(CompileError::InvalidRegister { register: identifier, span: span.clone(), custom_label: None });
157 }
158 } else if let Ok(opcode) = Opcode::from_str(&identifier) {
159 tokens.push(Token::Opcode(opcode, span));
160 } else {
161 tokens.push(Token::Identifier(identifier, span));
162 }
163 }
164 c if c.is_whitespace() => {
165 chars.next();
166 }
167 '+' => {
168 chars.next();
169 let span = token_start..token_start + 1;
170 tokens.push(Token::BinaryOp(Op::Add, span));
171 }
172 '-' => {
173 chars.next();
174 let span = token_start..token_start + 1;
175 tokens.push(Token::BinaryOp(Op::Sub, span));
176 }
177 '*' => {
178 chars.next();
179 let span = token_start..token_start + 1;
180 tokens.push(Token::BinaryOp(Op::Mul, span));
181 }
182 '.' => {
183 chars.next();
184 let directive: String = chars.by_ref()
185 .take_while(|(_, c)| c.is_ascii_alphanumeric() || *c == '_')
186 .map(|(_, c)| c)
187 .collect();
188 let span = token_start..token_start + directive.len() + 1;
189 tokens.push(Token::Directive(directive, span));
190 }
191 '"' => {
192 chars.next();
193 let mut string_literal = String::new();
194 while let Some((_, c)) = chars.peek() {
195 if *c == '"' {
196 chars.next();
197 let span = token_start..token_start + string_literal.len() + 2;
198 tokens.push(Token::StringLiteral(string_literal, span));
199 break;
200 } else if *c == '\n' {
201 errors.push(CompileError::UnterminatedStringLiteral { span: token_start..token_start + 1, custom_label: None });
202 }
203 string_literal.push(chars.next().unwrap().1);
204 }
205 }
206 '(' => {
207 chars.next();
208 let span = token_start..token_start + 1;
209 let token = Token::LeftParen(span);
210 paren_stack.push(token.clone());
211 tokens.push(token);
212 }
213 ')' => {
214 chars.next();
215 let span = token_start..token_start + 1;
216 paren_stack.pop();
217 tokens.push(Token::RightParen(span));
218 }
219 '[' => {
220 chars.next();
221 let span = token_start..token_start + 1;
222 tokens.push(Token::LeftBracket(span));
223 }
224 ']' => {
225 chars.next();
226 let span = token_start..token_start + 1;
227 tokens.push(Token::RightBracket(span));
228 }
229 ',' => {
230 chars.next();
231 let span = token_start..token_start + 1;
232 tokens.push(Token::Comma(span));
233 }
234 '#' => {
236 chars.next();
237 break;
238 }
239 '/' => {
240 chars.next();
241 if let Some((_, '/')) = chars.peek() {
242 chars.next();
243 break;
244 } else {
245 chars.next();
246 let span = token_start..token_start + 1;
247 tokens.push(Token::BinaryOp(Op::Div, span));
248 }
249 }
250 _ => {
251 let span = token_start..token_start + 1;
252 errors.push(CompileError::UnexpectedCharacter { character: *c, span, custom_label: None });
253 chars.next();
254 }
255 }
256 }
257 byte_offset += line.len();
258 byte_offset += 1;
260 }
261
262 while !paren_stack.is_empty() {
263 let Token::LeftParen(span) = paren_stack.pop().unwrap() else {
264 bug!("this stack should only contain left paren tokens")
265 };
266 errors.push(CompileError::UnmatchedParen { span, custom_label: None });
267 }
268
269 if errors.is_empty() {
270 Ok(tokens)
271 } else {
272 Err(errors)
273 }
274}