1use alloc::vec::Vec;
4
5#[derive(Debug)]
10pub enum StrPart<S, T> {
11 Str(S),
13 Term(T),
18 Char(char),
20}
21
22#[derive(Debug)]
28pub struct Token<S>(pub S, pub Tok<S>);
29
30#[derive(Debug)]
35pub enum Tok<S> {
36 Word,
38 Var,
40 Fmt,
42 Num,
44 Str(Vec<StrPart<S, Token<S>>>),
46 Sym,
48 Block(Vec<Token<S>>),
50}
51
52#[derive(Clone, Debug)]
56#[non_exhaustive]
57pub enum Expect<S> {
58 Digit,
60 Ident,
62 Delim(S),
64 Escape,
66 Unicode,
68 Token,
70}
71
72impl Expect<&str> {
73 pub fn as_str(&self) -> &'static str {
75 match self {
76 Self::Digit => "digit",
77 Self::Ident => "identifier",
78 Self::Delim("(") => "closing parenthesis",
79 Self::Delim("[") => "closing bracket",
80 Self::Delim("{") => "closing brace",
81 Self::Delim("\"") => "closing quote",
82 Self::Delim(_) => panic!(),
83 Self::Escape => "string escape sequence",
84 Self::Unicode => "4-digit hexadecimal UTF-8 code point",
85 Self::Token => "token",
86 }
87 }
88}
89
90pub type Error<S> = (Expect<S>, S);
92
93pub struct Lexer<S> {
95 i: S,
96 e: Vec<Error<S>>,
97}
98
99impl<'a> Lexer<&'a str> {
100 #[must_use]
102 pub fn new(i: &'a str) -> Self {
103 let e = Vec::new();
104 Self { i, e }
105 }
106
107 pub fn lex(mut self) -> Result<Vec<Token<&'a str>>, Vec<Error<&'a str>>> {
109 let tokens = self.tokens();
110 self.space();
111 if !self.i.is_empty() {
112 self.e.push((Expect::Token, self.i));
113 }
114
115 if self.e.is_empty() {
116 Ok(tokens)
117 } else {
118 Err(self.e)
119 }
120 }
121
122 fn next(&mut self) -> Option<char> {
123 let mut chars = self.i.chars();
124 let c = chars.next()?;
125 self.i = chars.as_str();
126 Some(c)
127 }
128
129 fn take(&mut self, len: usize) -> &'a str {
130 let (head, tail) = self.i.split_at(len);
131 self.i = tail;
132 head
133 }
134
135 fn trim(&mut self, f: impl FnMut(char) -> bool) {
136 self.i = self.i.trim_start_matches(f);
137 }
138
139 fn consumed(&mut self, skip: usize, f: impl FnOnce(&mut Self)) -> &'a str {
140 self.with_consumed(|l| {
141 l.i = &l.i[skip..];
142 f(l);
143 })
144 .0
145 }
146
147 fn with_consumed<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> (&'a str, T) {
148 let start = self.i;
149 let y = f(self);
150 (&start[..start.len() - self.i.len()], y)
151 }
152
153 fn space(&mut self) {
155 loop {
156 self.i = self.i.trim_start();
157 match self.i.strip_prefix('#') {
158 Some(comment) => self.i = comment,
159 None => break,
160 }
161 loop {
163 let (before, after) = self.i.split_once('\n').unwrap_or((self.i, ""));
164 let before = before.strip_suffix('\r').unwrap_or(before);
165 self.i = after;
166 if before.chars().rev().take_while(|c| *c == '\\').count() % 2 == 0 {
168 break;
169 }
170 }
171 }
172 }
173
174 fn mod_then_ident(&mut self) {
175 self.ident0();
176 if let Some(rest) = self.i.strip_prefix("::") {
177 self.i = rest.strip_prefix(['@', '$']).unwrap_or(rest);
178 self.ident1();
179 }
180 }
181
182 fn ident0(&mut self) {
184 self.trim(|c: char| c.is_ascii_alphanumeric() || c == '_');
185 }
186
187 fn ident1(&mut self) {
189 let first = |c: char| c.is_ascii_alphabetic() || c == '_';
190 if let Some(rest) = self.i.strip_prefix(first) {
191 self.i = rest;
192 self.ident0();
193 } else {
194 self.e.push((Expect::Ident, self.i));
195 }
196 }
197
198 fn digits1(&mut self) {
200 if let Some(rest) = self.i.strip_prefix(|c: char| c.is_ascii_digit()) {
201 self.i = rest.trim_start_matches(|c: char| c.is_ascii_digit());
202 } else {
203 self.e.push((Expect::Digit, self.i));
204 }
205 }
206
207 fn num(&mut self) {
209 self.trim(|c| c.is_ascii_digit());
210 if let Some(i) = self.i.strip_prefix('.') {
211 self.i = i;
212 self.digits1();
213 }
214 if let Some(i) = self.i.strip_prefix(['e', 'E']) {
215 self.i = i.strip_prefix(['+', '-']).unwrap_or(i);
216 self.digits1();
217 }
218 }
219
220 fn escape(&mut self) -> Option<StrPart<&'a str, Token<&'a str>>> {
221 let mut chars = self.i.chars();
222 let part = match chars.next() {
223 Some(c @ ('\\' | '/' | '"')) => StrPart::Char(c),
224 Some('b') => StrPart::Char('\x08'),
225 Some('f') => StrPart::Char('\x0C'),
226 Some('n') => StrPart::Char('\n'),
227 Some('r') => StrPart::Char('\r'),
228 Some('t') => StrPart::Char('\t'),
229 Some('u') => {
230 let err_at = |lex: &mut Self, pos| {
231 lex.i = pos;
232 lex.e.push((Expect::Unicode, lex.i));
233 None
234 };
235 let mut hex = 0;
236 let start_i = chars.as_str();
237 for _ in 0..4 {
238 let cur_i = chars.as_str();
239 if let Some(digit) = chars.next().and_then(|c| c.to_digit(16)) {
240 hex = (hex << 4) + digit;
241 } else {
242 return err_at(self, cur_i);
243 }
244 }
245 match char::from_u32(hex) {
246 None => return err_at(self, start_i),
247 Some(c) => StrPart::Char(c),
248 }
249 }
250 Some('(') => {
251 let (full, block) = self.with_consumed(Self::block);
252 return Some(StrPart::Term(Token(full, block)));
253 }
254 Some(_) | None => {
255 self.e.push((Expect::Escape, self.i));
256 return None;
257 }
258 };
259
260 self.i = chars.as_str();
261 Some(part)
262 }
263
264 fn str(&mut self) -> Tok<&'a str> {
268 let start = self.take(1);
269 assert_eq!(start, "\"");
270 let mut parts = Vec::new();
271
272 loop {
273 let s = self.consumed(0, |lex| lex.trim(|c| c != '\\' && c != '"'));
274 if !s.is_empty() {
275 parts.push(StrPart::Str(s));
276 }
277 match self.next() {
278 Some('"') => break,
279 Some('\\') => self.escape().map(|part| parts.push(part)),
280 Some(_) => unreachable!(),
282 None => {
283 self.e.push((Expect::Delim(start), self.i));
284 break;
285 }
286 };
287 }
288 Tok::Str(parts)
289 }
290
291 fn token(&mut self) -> Option<Token<&'a str>> {
292 self.space();
293
294 let hd_op = |c| "|=!<>+-*/%".contains(c);
295 let tl_op = |c| hd_op(c) && c != '-';
296
297 let mut chars = self.i.chars();
298 let (s, tok) = match chars.next()? {
299 'a'..='z' | 'A'..='Z' | '_' => (self.consumed(1, Self::mod_then_ident), Tok::Word),
300 '$' => (self.consumed(1, Self::ident1), Tok::Var),
301 '@' => (self.consumed(1, Self::ident1), Tok::Fmt),
302 '0'..='9' => (self.consumed(1, Self::num), Tok::Num),
303 c if hd_op(c) => (self.consumed(1, |lex| lex.trim(tl_op)), Tok::Sym),
304 '.' => match chars.next() {
305 Some('.') => (self.take(2), Tok::Sym),
306 Some('a'..='z' | 'A'..='Z' | '_') => (self.consumed(2, Self::ident0), Tok::Sym),
307 _ => (self.take(1), Tok::Sym),
308 },
309 ':' | ';' | ',' | '?' => (self.take(1), Tok::Sym),
310 '"' => self.with_consumed(Self::str),
311 '(' | '[' | '{' => self.with_consumed(Self::block),
312 _ => return None,
313 };
314 Some(Token(s, tok))
315 }
316
317 fn tokens(&mut self) -> Vec<Token<&'a str>> {
318 core::iter::from_fn(|| self.token()).collect()
319 }
320
321 fn block(&mut self) -> Tok<&'a str> {
325 let open = self.take(1);
326 let close = match open {
327 "(" => ')',
328 "[" => ']',
329 "{" => '}',
330 _ => panic!(),
331 };
332 let mut tokens = self.tokens();
333
334 self.space();
335 if let Some(rest) = self.i.strip_prefix(close) {
336 tokens.push(Token(&self.i[..1], Tok::Sym));
337 self.i = rest;
338 } else {
339 self.e.push((Expect::Delim(open), self.i));
340 }
341 Tok::Block(tokens)
342 }
343}
344
345impl<'a> Token<&'a str> {
346 pub fn opt_as_str(found: Option<&Self>, code: &'a str) -> &'a str {
350 found.map_or(&code[code.len()..], |found| found.as_str())
351 }
352
353 pub fn as_str(&self) -> &'a str {
355 self.0
356 }
357}