bubbles/compiler/lexer.rs
1//! Logos-based lexer that tokenises `.bub` source and expression strings.
2
3use logos::Logos;
4
5/// A lexical token produced by the lexer.
6#[derive(Logos, Debug, Clone, PartialEq)]
7#[logos(skip r"[ \t\r\f]+")] // skip horizontal whitespace; newlines are significant in the parser
8pub enum Token {
9 // ── literals ──────────────────────────────────────────────────────────────
10 /// Floating-point or integer literal.
11 #[regex(r"[0-9]+(\.[0-9]+)?", |lex| lex.slice().parse::<f64>().ok())]
12 Number(f64),
13
14 /// Double-quoted string literal.
15 #[regex(r#""([^"\\]|\\.)*""#, |lex| {
16 let s = lex.slice();
17 Some(s[1..s.len()-1].replace("\\\"", "\"").replace("\\\\", "\\").replace("\\n", "\n"))
18 })]
19 Str(String),
20
21 // ── identifiers / keywords ─────────────────────────────────────────────────
22 /// Variable beginning with `$`.
23 #[regex(r"\$[A-Za-z_][A-Za-z0-9_]*", |lex| lex.slice().to_owned())]
24 Var(String),
25
26 /// Plain identifier or keyword.
27 #[regex(r"[A-Za-z_][A-Za-z0-9_]*", |lex| lex.slice().to_owned())]
28 Ident(String),
29
30 // ── delimiters ─────────────────────────────────────────────────────────────
31 /// `(` – opens a parenthesised sub-expression or argument list.
32 #[token("(")]
33 LParen,
34 /// `)` – closes a parenthesised sub-expression or argument list.
35 #[token(")")]
36 RParen,
37 /// `,` – argument separator.
38 #[token(",")]
39 Comma,
40 /// `<<` – opens a command/statement block.
41 #[token("<<")]
42 CmdOpen,
43 /// `>>` – closes a command/statement block.
44 #[token(">>")]
45 CmdClose,
46 /// `{` – opens an inline expression.
47 #[token("{")]
48 BraceOpen,
49 /// `}` – closes an inline expression.
50 #[token("}")]
51 BraceClose,
52
53 // ── arithmetic ─────────────────────────────────────────────────────────────
54 /// `+`
55 #[token("+")]
56 Plus,
57 /// `-`
58 #[token("-")]
59 Minus,
60 /// `*`
61 #[token("*")]
62 Star,
63 /// `/`
64 #[token("/")]
65 Slash,
66 /// `%`
67 #[token("%")]
68 Percent,
69
70 // ── comparison (order matters: `>=` before `>`) ───────────────────────────
71 /// `>=`
72 #[token(">=")]
73 Gte,
74 /// `<=`
75 #[token("<=")]
76 Lte,
77 /// `>`
78 #[token(">")]
79 Gt,
80 /// `<`
81 #[token("<")]
82 Lt,
83 /// `==`
84 #[token("==")]
85 EqEq,
86 /// `!=`
87 #[token("!=")]
88 Neq,
89
90 // ── logical ────────────────────────────────────────────────────────────────
91 /// `&&`
92 #[token("&&")]
93 AndAnd,
94 /// `||`
95 #[token("||")]
96 OrOr,
97 /// `!`
98 #[token("!")]
99 Bang,
100
101 // ── assignment / misc ──────────────────────────────────────────────────────
102 /// `=` (used in `<<set $x = …>>`)
103 #[token("=")]
104 Eq,
105 /// `:`
106 #[token(":")]
107 Colon,
108 /// `->`
109 #[token("->")]
110 Arrow,
111 /// `=>`
112 #[token("=>")]
113 FatArrow,
114 /// `---` body-start delimiter.
115 #[token("---")]
116 BodyStart,
117 /// `===` node-end delimiter.
118 #[token("===")]
119 NodeEnd,
120 /// `#` tag prefix.
121 #[token("#")]
122 Hash,
123 /// Newline.
124 #[token("\n")]
125 Newline,
126}
127
128/// A spanned token pair.
129pub type Spanned = (Token, std::ops::Range<usize>);
130
131/// Lexes `input` into a [`Vec`] of spanned tokens, returning an error on
132/// any character that does not match a known token.
133///
134/// # Errors
135///
136/// Returns [`crate::error::DialogueError::Parse`] with `file` / `line` context
137/// when an unrecognised character is encountered, so the caller receives a
138/// precise pointer into the source rather than a confusing downstream failure.
139pub fn tokenise(input: &str, file: &str, line: usize) -> crate::error::Result<Vec<Spanned>> {
140 let mut tokens = Vec::new();
141 for (result, span) in Token::lexer(input).spanned() {
142 if let Ok(tok) = result {
143 tokens.push((tok, span));
144 } else {
145 let ch = input[span].chars().next().unwrap_or('?');
146 return Err(crate::error::DialogueError::Parse {
147 file: file.to_owned(),
148 line,
149 message: format!(
150 "unexpected character `{ch}` in expression; \
151 did you mean `$` for a variable?"
152 ),
153 });
154 }
155 }
156 Ok(tokens)
157}
158
159#[cfg(test)]
160#[path = "lexer_tests.rs"]
161mod tests;