1use crate::diag::Diagnostic;
6use crate::span::Span;
7
8#[derive(Debug, Clone, PartialEq)]
9pub enum TokenKind {
10 Ident(String),
11 Int(i64),
12 Float(f64),
13 Str(String),
15 AtIdent(String),
17 HexColor(String),
19 Dimension(u32, u32),
21 Comment(String),
22
23 LBrace,
24 RBrace,
25 LParen,
26 RParen,
27 LBracket,
28 RBracket,
29 Colon,
30 Semi,
31 Comma,
32 Dot,
33 DotDot,
34 Star,
35 Plus,
36 Minus,
37 Slash,
38 Percent,
39 Eq,
40 Arrow, BidiArrow, BackArrow, Newline,
44 Eof,
45}
46
47impl TokenKind {
48 pub fn describe(&self) -> String {
50 match self {
51 TokenKind::Ident(name) => format!("identifier `{name}`"),
52 TokenKind::Int(v) => format!("number `{v}`"),
53 TokenKind::Float(v) => format!("number `{v}`"),
54 TokenKind::Str(_) => "string literal".into(),
55 TokenKind::AtIdent(name) => format!("theme token `@{name}`"),
56 TokenKind::HexColor(c) => format!("color `#{c}`"),
57 TokenKind::Dimension(a, b) => format!("grid size `{a}x{b}`"),
58 TokenKind::Comment(_) => "comment".into(),
59 TokenKind::LBrace => "`{`".into(),
60 TokenKind::RBrace => "`}`".into(),
61 TokenKind::LParen => "`(`".into(),
62 TokenKind::RParen => "`)`".into(),
63 TokenKind::LBracket => "`[`".into(),
64 TokenKind::RBracket => "`]`".into(),
65 TokenKind::Colon => "`:`".into(),
66 TokenKind::Semi => "`;`".into(),
67 TokenKind::Comma => "`,`".into(),
68 TokenKind::Dot => "`.`".into(),
69 TokenKind::DotDot => "`..`".into(),
70 TokenKind::Star => "`*`".into(),
71 TokenKind::Plus => "`+`".into(),
72 TokenKind::Minus => "`-`".into(),
73 TokenKind::Slash => "`/`".into(),
74 TokenKind::Percent => "`%`".into(),
75 TokenKind::Eq => "`=`".into(),
76 TokenKind::Arrow => "`->`".into(),
77 TokenKind::BidiArrow => "`<->`".into(),
78 TokenKind::BackArrow => "`<-`".into(),
79 TokenKind::Newline => "end of line".into(),
80 TokenKind::Eof => "end of file".into(),
81 }
82 }
83}
84
85#[derive(Debug, Clone)]
86pub struct Token {
87 pub kind: TokenKind,
88 pub span: Span,
89}
90
91pub struct LexOutput {
92 pub tokens: Vec<Token>,
93 pub diagnostics: Vec<Diagnostic>,
94}
95
96pub fn lex(text: &str) -> LexOutput {
97 Lexer {
98 text,
99 bytes: text.as_bytes(),
100 pos: 0,
101 tokens: Vec::new(),
102 diags: Vec::new(),
103 }
104 .run()
105}
106
107struct Lexer<'a> {
108 text: &'a str,
109 bytes: &'a [u8],
110 pos: usize,
111 tokens: Vec<Token>,
112 diags: Vec<Diagnostic>,
113}
114
115impl<'a> Lexer<'a> {
116 fn run(mut self) -> LexOutput {
117 while self.pos < self.bytes.len() {
118 let start = self.pos;
119 let b = self.bytes[self.pos];
120 match b {
121 b' ' | b'\t' | b'\r' => self.pos += 1,
122 b'\n' => {
123 self.pos += 1;
126 self.push(TokenKind::Newline, start);
127 }
128 b'/' if self.peek(1) == Some(b'/') => {
129 let mut end = self.pos;
130 while end < self.bytes.len() && self.bytes[end] != b'\n' {
131 end += 1;
132 }
133 let content = self.text[self.pos + 2..end].trim().to_string();
134 self.pos = end;
135 self.push(TokenKind::Comment(content), start);
136 }
137 b'"' => self.lex_string(start),
138 b'0'..=b'9' => self.lex_number(start),
139 b'A'..=b'Z' | b'a'..=b'z' | b'_' => self.lex_ident(start),
140 b'@' => {
141 self.pos += 1;
142 if self.cur_is_ident_start() {
143 let name = self.take_ident_text();
144 self.push(TokenKind::AtIdent(name), start);
145 } else {
146 self.error_char(
147 start,
148 "`@` must be followed by a theme token name, like `@accent`",
149 );
150 }
151 }
152 b'#' => {
153 self.pos += 1;
154 let hex_start = self.pos;
155 while self.pos < self.bytes.len() && self.bytes[self.pos].is_ascii_hexdigit() {
156 self.pos += 1;
157 }
158 let hex = &self.text[hex_start..self.pos];
159 if hex.len() == 3 || hex.len() == 6 || hex.len() == 8 {
160 self.push(TokenKind::HexColor(hex.to_string()), start);
161 } else {
162 self.diags.push(
163 Diagnostic::error("E0105", format!("invalid color literal `#{hex}`"))
164 .with_label(
165 Span::new(start, self.pos),
166 "expected 3, 6, or 8 hex digits",
167 )
168 .with_help("write colors as `#rgb`, `#rrggbb`, or `#rrggbbaa`"),
169 );
170 }
171 }
172 b'{' => self.single(TokenKind::LBrace),
173 b'}' => self.single(TokenKind::RBrace),
174 b'(' => self.single(TokenKind::LParen),
175 b')' => self.single(TokenKind::RParen),
176 b'[' => self.single(TokenKind::LBracket),
177 b']' => self.single(TokenKind::RBracket),
178 b':' => self.single(TokenKind::Colon),
179 b';' => self.single(TokenKind::Semi),
180 b',' => self.single(TokenKind::Comma),
181 b'*' => self.single(TokenKind::Star),
182 b'+' => self.single(TokenKind::Plus),
183 b'%' => self.single(TokenKind::Percent),
184 b'=' => self.single(TokenKind::Eq),
185 b'/' => self.single(TokenKind::Slash),
186 b'.' => {
187 if self.peek(1) == Some(b'.') {
188 self.pos += 2;
189 self.push(TokenKind::DotDot, start);
190 } else {
191 self.single(TokenKind::Dot);
192 }
193 }
194 b'-' => {
195 if self.peek(1) == Some(b'>') {
196 self.pos += 2;
197 self.push(TokenKind::Arrow, start);
198 } else {
199 self.single(TokenKind::Minus);
200 }
201 }
202 b'<' => {
203 if self.peek(1) == Some(b'-') && self.peek(2) == Some(b'>') {
204 self.pos += 3;
205 self.push(TokenKind::BidiArrow, start);
206 } else if self.peek(1) == Some(b'-') {
207 self.pos += 2;
208 self.push(TokenKind::BackArrow, start);
209 } else {
210 self.error_char(
211 start,
212 "`<` is only used in the `<->` and `<-` edge arrows",
213 );
214 }
215 }
216 _ => {
217 let ch_len = self.text[self.pos..]
218 .chars()
219 .next()
220 .map(|c| c.len_utf8())
221 .unwrap_or(1);
222 self.pos += ch_len;
223 let ch = &self.text[start..self.pos];
224 self.diags.push(
225 Diagnostic::error("E0101", format!("unexpected character `{ch}`"))
226 .with_label(
227 Span::new(start, self.pos),
228 "not valid drawlang syntax here",
229 ),
230 );
231 }
232 }
233 }
234 let end = self.bytes.len();
235 if !matches!(
236 self.tokens.last().map(|t| &t.kind),
237 Some(TokenKind::Newline) | None
238 ) {
239 self.tokens.push(Token {
240 kind: TokenKind::Newline,
241 span: Span::new(end, end),
242 });
243 }
244 self.tokens.push(Token {
245 kind: TokenKind::Eof,
246 span: Span::new(end, end),
247 });
248 LexOutput {
249 tokens: self.tokens,
250 diagnostics: self.diags,
251 }
252 }
253
254 fn peek(&self, ahead: usize) -> Option<u8> {
255 self.bytes.get(self.pos + ahead).copied()
256 }
257
258 fn push(&mut self, kind: TokenKind, start: usize) {
259 self.tokens.push(Token {
260 kind,
261 span: Span::new(start, self.pos),
262 });
263 }
264
265 fn single(&mut self, kind: TokenKind) {
266 let start = self.pos;
267 self.pos += 1;
268 self.push(kind, start);
269 }
270
271 fn error_char(&mut self, start: usize, help: &str) {
272 let ch = &self.text[start..self.pos.max(start + 1).min(self.text.len())];
273 self.diags.push(
274 Diagnostic::error("E0101", format!("unexpected character `{ch}`"))
275 .with_label(Span::new(start, self.pos.max(start + 1)), "not valid here")
276 .with_help(help),
277 );
278 }
279
280 fn cur_is_ident_start(&self) -> bool {
281 matches!(
282 self.bytes.get(self.pos),
283 Some(b'A'..=b'Z' | b'a'..=b'z' | b'_')
284 )
285 }
286
287 fn take_ident_text(&mut self) -> String {
288 let start = self.pos;
289 while matches!(
290 self.bytes.get(self.pos),
291 Some(b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_')
292 ) {
293 self.pos += 1;
294 }
295 self.text[start..self.pos].to_string()
296 }
297
298 fn lex_ident(&mut self, start: usize) {
299 let name = self.take_ident_text();
300 self.push(TokenKind::Ident(name), start);
301 }
302
303 fn lex_number(&mut self, start: usize) {
304 while matches!(self.bytes.get(self.pos), Some(b'0'..=b'9')) {
305 self.pos += 1;
306 }
307 if self.bytes.get(self.pos) == Some(&b'x')
309 && matches!(self.bytes.get(self.pos + 1), Some(b'0'..=b'9'))
310 {
311 let cols_str = &self.text[start..self.pos];
312 self.pos += 1; let rows_start = self.pos;
314 while matches!(self.bytes.get(self.pos), Some(b'0'..=b'9')) {
315 self.pos += 1;
316 }
317 let rows_str = &self.text[rows_start..self.pos];
318 if self.cur_is_ident_start() {
320 self.take_ident_text();
321 self.diags.push(
322 Diagnostic::error("E0105", "malformed grid size")
323 .with_label(Span::new(start, self.pos), "expected something like `2x4`"),
324 );
325 return;
326 }
327 let a: u32 = cols_str.parse().unwrap_or(0);
328 let b: u32 = rows_str.parse().unwrap_or(0);
329 self.push(TokenKind::Dimension(a, b), start);
330 return;
331 }
332 if self.bytes.get(self.pos) == Some(&b'.')
334 && matches!(self.bytes.get(self.pos + 1), Some(b'0'..=b'9'))
335 {
336 self.pos += 1;
337 while matches!(self.bytes.get(self.pos), Some(b'0'..=b'9')) {
338 self.pos += 1;
339 }
340 let v: f64 = self.text[start..self.pos].parse().unwrap();
341 self.push(TokenKind::Float(v), start);
342 return;
343 }
344 if self.cur_is_ident_start() {
346 let unit_start = self.pos;
347 let unit = self.take_ident_text();
348 self.diags.push(
349 Diagnostic::error(
350 "E0105",
351 format!("numbers take no unit suffix; found `{unit}`"),
352 )
353 .with_label(Span::new(unit_start, self.pos), "remove this suffix")
354 .with_help("drawlang lengths are always in pixels; write the bare number"),
355 );
356 return;
357 }
358 let v: i64 = self.text[start..self.pos].parse().unwrap_or(0);
359 self.push(TokenKind::Int(v), start);
360 }
361
362 fn lex_string(&mut self, start: usize) {
363 self.pos += 1; let mut value = String::new();
365 let mut brace_depth = 0usize;
366 loop {
367 match self.bytes.get(self.pos) {
368 None | Some(b'\n') => {
369 self.diags.push(
370 Diagnostic::error("E0102", "unterminated string literal")
371 .with_label(
372 Span::new(start, self.pos),
373 "string starts here and never closes",
374 )
375 .with_help("add a closing `\"` before the end of the line"),
376 );
377 self.push(TokenKind::Str(value), start);
378 return;
379 }
380 Some(b'"') if brace_depth == 0 => {
381 self.pos += 1;
382 break;
383 }
384 Some(b'\\') => {
385 self.pos += 1;
386 match self.bytes.get(self.pos) {
387 Some(b'n') => value.push('\n'),
388 Some(b't') => value.push('\t'),
389 Some(b'"') => value.push('"'),
390 Some(b'\\') => value.push('\\'),
391 Some(b'{') => value.push('\u{1}'),
394 Some(b'}') => value.push('\u{2}'),
395 other => {
396 let ch = other.map(|&b| b as char).unwrap_or('?');
397 self.diags.push(
398 Diagnostic::error("E0102", format!("unknown escape `\\{ch}`"))
399 .with_label(
400 Span::new(self.pos - 1, self.pos + 1),
401 "not a valid escape sequence",
402 )
403 .with_help(r#"valid escapes are \" \\ \n \t \{ \}"#),
404 );
405 }
406 }
407 self.pos += 1;
408 }
409 Some(&b'{') => {
410 brace_depth += 1;
411 value.push('{');
412 self.pos += 1;
413 }
414 Some(&b'}') => {
415 brace_depth = brace_depth.saturating_sub(1);
416 value.push('}');
417 self.pos += 1;
418 }
419 Some(_) => {
420 let ch = self.text[self.pos..].chars().next().unwrap();
421 value.push(ch);
422 self.pos += ch.len_utf8();
423 }
424 }
425 }
426 if brace_depth > 0 {
427 self.diags.push(
428 Diagnostic::error("E0104", "unbalanced `{` in string interpolation")
429 .with_label(
430 Span::new(start, self.pos),
431 "this string has an unclosed `{`",
432 )
433 .with_help(
434 r#"close the interpolation (`"GPU {i}"`) or escape the brace as `\{`"#,
435 ),
436 );
437 }
438 self.push(TokenKind::Str(value), start);
439 }
440}