1use crate::diagnostic::{LuxError, Span};
8
9#[derive(Debug, Clone, PartialEq)]
10pub enum Tok {
11 Int(i64),
13 Float(f64),
14 Str(String),
15 True,
16 False,
17 Ident(String),
18 Let,
20 Var,
21 If,
22 Else,
23 While,
24 For,
25 In,
26 Func,
27 Return,
28 Struct,
29 Enum,
30 Match,
31 LParen,
33 RParen,
34 LBrace,
35 RBrace,
36 LBracket,
37 RBracket,
38 Colon,
39 Comma,
40 Dot,
41 DotDot,
42 Arrow,
43 FatArrow,
44 Plus,
46 Minus,
47 Star,
48 Slash,
49 Percent,
50 Eq,
51 PlusEq,
52 MinusEq,
53 EqEq,
54 NotEq,
55 Lt,
56 Gt,
57 Le,
58 Ge,
59 AndAnd,
60 OrOr,
61 Bang,
62 Eof,
64}
65
66#[derive(Debug, Clone)]
67pub struct Token {
68 pub tok: Tok,
69 pub span: Span,
70}
71
72pub fn lex(source: &str) -> Result<Vec<Token>, LuxError> {
73 let bytes = source.as_bytes();
74 let n = bytes.len();
75 let mut tokens = Vec::new();
76 let mut i = 0;
77
78 while i < n {
79 let c = bytes[i];
80
81 if c == b' ' || c == b'\t' || c == b'\r' || c == b'\n' {
83 i += 1;
84 continue;
85 }
86
87 if c == b'/' && i + 1 < n && bytes[i + 1] == b'/' {
89 while i < n && bytes[i] != b'\n' {
90 i += 1;
91 }
92 continue;
93 }
94
95 let start = i;
96
97 if c.is_ascii_digit() {
99 while i < n && bytes[i].is_ascii_digit() {
100 i += 1;
101 }
102 if i < n && bytes[i] == b'.' && !(i + 1 < n && bytes[i + 1] == b'.') {
105 if i + 1 < n && bytes[i + 1].is_ascii_digit() {
106 i += 1; while i < n && bytes[i].is_ascii_digit() {
108 i += 1;
109 }
110 let text = &source[start..i];
111 let val: f64 = text
112 .parse()
113 .map_err(|_| LuxError::new("invalid float literal", Span::new(start, i)))?;
114 tokens.push(Token {
115 tok: Tok::Float(val),
116 span: Span::new(start, i),
117 });
118 continue;
119 } else {
120 return Err(LuxError::new(
121 "a float needs at least one digit after the decimal point",
122 Span::new(start, i + 1),
123 )
124 .with_note("write 3.0, not 3."));
125 }
126 }
127 let text = &source[start..i];
128 let val: i64 = text
129 .parse()
130 .map_err(|_| LuxError::new("integer literal is too large", Span::new(start, i)))?;
131 tokens.push(Token {
132 tok: Tok::Int(val),
133 span: Span::new(start, i),
134 });
135 continue;
136 }
137
138 if c.is_ascii_alphabetic() || c == b'_' {
140 while i < n && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
141 i += 1;
142 }
143 let text = &source[start..i];
144 let tok = match text {
145 "let" => Tok::Let,
146 "var" => Tok::Var,
147 "if" => Tok::If,
148 "else" => Tok::Else,
149 "while" => Tok::While,
150 "for" => Tok::For,
151 "in" => Tok::In,
152 "func" => Tok::Func,
153 "return" => Tok::Return,
154 "struct" => Tok::Struct,
155 "enum" => Tok::Enum,
156 "match" => Tok::Match,
157 "true" => Tok::True,
158 "false" => Tok::False,
159 _ => Tok::Ident(text.to_string()),
160 };
161 tokens.push(Token {
162 tok,
163 span: Span::new(start, i),
164 });
165 continue;
166 }
167
168 if c == b'"' {
170 i += 1; let mut s = String::new();
172 loop {
173 if i >= n {
174 return Err(LuxError::new("unterminated string", Span::new(start, i))
175 .with_note("add a closing \" to the end of the string"));
176 }
177 let ch = bytes[i];
178 if ch == b'"' {
179 i += 1; break;
181 }
182 if ch == b'\\' {
183 if i + 1 >= n {
184 return Err(LuxError::new("unterminated string", Span::new(start, i)));
185 }
186 let mapped = match bytes[i + 1] {
187 b'n' => '\n',
188 b't' => '\t',
189 b'"' => '"',
190 b'\\' => '\\',
191 other => {
192 return Err(LuxError::new(
193 format!("unknown escape sequence \\{}", other as char),
194 Span::new(i, i + 2),
195 )
196 .with_note("lux understands \\n, \\t, \\\" and \\\\"));
197 }
198 };
199 s.push(mapped);
200 i += 2;
201 continue;
202 }
203 let rest = &source[i..];
205 let ch_char = rest.chars().next().unwrap();
206 s.push(ch_char);
207 i += ch_char.len_utf8();
208 }
209 tokens.push(Token {
210 tok: Tok::Str(s),
211 span: Span::new(start, i),
212 });
213 continue;
214 }
215
216 let c1 = if i + 1 < n { bytes[i + 1] } else { 0 };
218 let two = match (c, c1) {
219 (b'=', b'=') => Some(Tok::EqEq),
220 (b'!', b'=') => Some(Tok::NotEq),
221 (b'<', b'=') => Some(Tok::Le),
222 (b'>', b'=') => Some(Tok::Ge),
223 (b'&', b'&') => Some(Tok::AndAnd),
224 (b'|', b'|') => Some(Tok::OrOr),
225 (b'+', b'=') => Some(Tok::PlusEq),
226 (b'-', b'=') => Some(Tok::MinusEq),
227 (b'-', b'>') => Some(Tok::Arrow),
228 (b'=', b'>') => Some(Tok::FatArrow),
229 (b'.', b'.') => Some(Tok::DotDot),
230 _ => None,
231 };
232 if let Some(t) = two {
233 tokens.push(Token {
234 tok: t,
235 span: Span::new(start, i + 2),
236 });
237 i += 2;
238 continue;
239 }
240
241 let single = match c {
243 b'(' => Tok::LParen,
244 b')' => Tok::RParen,
245 b'{' => Tok::LBrace,
246 b'}' => Tok::RBrace,
247 b'[' => Tok::LBracket,
248 b']' => Tok::RBracket,
249 b':' => Tok::Colon,
250 b',' => Tok::Comma,
251 b'.' => Tok::Dot,
252 b'+' => Tok::Plus,
253 b'-' => Tok::Minus,
254 b'*' => Tok::Star,
255 b'/' => Tok::Slash,
256 b'%' => Tok::Percent,
257 b'=' => Tok::Eq,
258 b'<' => Tok::Lt,
259 b'>' => Tok::Gt,
260 b'!' => Tok::Bang,
261 other => {
262 return Err(LuxError::new(
263 format!("unexpected character '{}'", other as char),
264 Span::new(start, start + 1),
265 ));
266 }
267 };
268 tokens.push(Token {
269 tok: single,
270 span: Span::new(start, start + 1),
271 });
272 i += 1;
273 }
274
275 tokens.push(Token {
276 tok: Tok::Eof,
277 span: Span::new(n, n),
278 });
279 Ok(tokens)
280}