1use alloc::vec::Vec;
4
5#[derive(Debug)]
10pub enum StrPart<S, T> {
11 Str(S),
13 Term(T),
15 Char(char),
17}
18
19#[derive(Debug)]
21pub struct Token<S>(pub(crate) S, pub(crate) Tok<S>);
22
23#[derive(Debug)]
24pub(crate) enum Tok<S> {
25 Word,
27 Var,
29 Fmt,
31 Num,
33 Str(Vec<StrPart<S, Token<S>>>),
35 Sym,
37 Block(Vec<Token<S>>),
39}
40
41#[derive(Clone, Debug)]
45#[non_exhaustive]
46pub enum Expect<S> {
47 Digit,
49 Ident,
51 Delim(S),
53 Escape,
55 Unicode,
57 Token,
59}
60
61impl Expect<&str> {
62 pub fn as_str(&self) -> &'static str {
64 match self {
65 Self::Digit => "digit",
66 Self::Ident => "identifier",
67 Self::Delim("(") => "closing parenthesis",
68 Self::Delim("[") => "closing bracket",
69 Self::Delim("{") => "closing brace",
70 Self::Delim("\"") => "closing quote",
71 Self::Delim(_) => panic!(),
72 Self::Escape => "string escape sequence",
73 Self::Unicode => "4-digit hexadecimal UTF-8 code point",
74 Self::Token => "token",
75 }
76 }
77}
78
79pub type Error<S> = (Expect<S>, S);
81
82pub struct Lexer<S> {
84 i: S,
85 e: Vec<Error<S>>,
86}
87
88impl<'a> Lexer<&'a str> {
89 #[must_use]
91 pub fn new(i: &'a str) -> Self {
92 let e = Vec::new();
93 Self { i, e }
94 }
95
96 pub fn lex(mut self) -> Result<Vec<Token<&'a str>>, Vec<Error<&'a str>>> {
98 let tokens = self.tokens();
99 self.space();
100 if !self.i.is_empty() {
101 self.e.push((Expect::Token, self.i));
102 }
103
104 if self.e.is_empty() {
105 Ok(tokens)
106 } else {
107 Err(self.e)
108 }
109 }
110
111 fn next(&mut self) -> Option<char> {
112 let mut chars = self.i.chars();
113 let c = chars.next()?;
114 self.i = chars.as_str();
115 Some(c)
116 }
117
118 fn take(&mut self, len: usize) -> &'a str {
119 let (head, tail) = self.i.split_at(len);
120 self.i = tail;
121 head
122 }
123
124 fn trim(&mut self, f: impl FnMut(char) -> bool) {
125 self.i = self.i.trim_start_matches(f);
126 }
127
128 fn consumed(&mut self, skip: usize, f: impl FnOnce(&mut Self)) -> &'a str {
129 self.with_consumed(|l| {
130 l.i = &l.i[skip..];
131 f(l);
132 })
133 .0
134 }
135
136 fn with_consumed<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> (&'a str, T) {
137 let start = self.i;
138 let y = f(self);
139 (&start[..start.len() - self.i.len()], y)
140 }
141
142 fn space(&mut self) {
144 loop {
145 self.i = self.i.trim_start();
146 match self.i.strip_prefix('#') {
147 Some(comment) => self.i = comment,
148 None => break,
149 }
150 loop {
152 let (before, after) = self.i.split_once('\n').unwrap_or((self.i, ""));
153 let before = before.strip_suffix('\r').unwrap_or(before);
154 self.i = after;
155 if before.chars().rev().take_while(|c| *c == '\\').count() % 2 == 0 {
157 break;
158 }
159 }
160 }
161 }
162
163 fn mod_then_ident(&mut self) {
164 self.ident0();
165 if let Some(rest) = self.i.strip_prefix("::") {
166 self.i = rest.strip_prefix(['@', '$']).unwrap_or(rest);
167 self.ident1();
168 }
169 }
170
171 fn ident0(&mut self) {
173 self.trim(|c: char| c.is_ascii_alphanumeric() || c == '_');
174 }
175
176 fn ident1(&mut self) {
178 let first = |c: char| c.is_ascii_alphabetic() || c == '_';
179 if let Some(rest) = self.i.strip_prefix(first) {
180 self.i = rest;
181 self.ident0();
182 } else {
183 self.e.push((Expect::Ident, self.i));
184 }
185 }
186
187 fn digits1(&mut self) {
189 if let Some(rest) = self.i.strip_prefix(|c: char| c.is_ascii_digit()) {
190 self.i = rest.trim_start_matches(|c: char| c.is_ascii_digit());
191 } else {
192 self.e.push((Expect::Digit, self.i));
193 }
194 }
195
196 fn num(&mut self) {
198 self.trim(|c| c.is_ascii_digit());
199 if let Some(i) = self.i.strip_prefix('.') {
200 self.i = i;
201 self.digits1();
202 }
203 if let Some(i) = self.i.strip_prefix(['e', 'E']) {
204 self.i = i.strip_prefix(['+', '-']).unwrap_or(i);
205 self.digits1();
206 }
207 }
208
209 fn escape(&mut self) -> Option<StrPart<&'a str, Token<&'a str>>> {
210 let mut chars = self.i.chars();
211 let part = match chars.next() {
212 Some(c @ ('\\' | '/' | '"')) => StrPart::Char(c),
213 Some('b') => StrPart::Char('\x08'),
214 Some('f') => StrPart::Char('\x0C'),
215 Some('n') => StrPart::Char('\n'),
216 Some('r') => StrPart::Char('\r'),
217 Some('t') => StrPart::Char('\t'),
218 Some('u') => {
219 let mut hex = 0;
220 for _ in 0..4 {
221 let i = chars.as_str();
222 if let Some(digit) = chars.next().and_then(|c| c.to_digit(16)) {
223 hex = (hex << 4) + digit;
224 } else {
225 self.i = i;
226 self.e.push((Expect::Unicode, self.i));
227 return None;
228 }
229 }
230 StrPart::Char(char::from_u32(hex).unwrap())
231 }
232 Some('(') => {
233 let (full, block) = self.with_consumed(Self::block);
234 return Some(StrPart::Term(Token(full, block)));
235 }
236 Some(_) | None => {
237 self.e.push((Expect::Escape, self.i));
238 return None;
239 }
240 };
241
242 self.i = chars.as_str();
243 Some(part)
244 }
245
246 fn str(&mut self) -> Tok<&'a str> {
250 let start = self.take(1);
251 assert_eq!(start, "\"");
252 let mut parts = Vec::new();
253
254 loop {
255 let s = self.consumed(0, |lex| lex.trim(|c| c != '\\' && c != '"'));
256 if !s.is_empty() {
257 parts.push(StrPart::Str(s));
258 }
259 match self.next() {
260 Some('"') => break,
261 Some('\\') => self.escape().map(|part| parts.push(part)),
262 Some(_) => unreachable!(),
264 None => {
265 self.e.push((Expect::Delim(start), self.i));
266 break;
267 }
268 };
269 }
270 Tok::Str(parts)
271 }
272
273 fn token(&mut self) -> Option<Token<&'a str>> {
274 self.space();
275
276 let is_op = |c| "|=!<>+-*/%".contains(c);
277
278 let mut chars = self.i.chars();
279 let (s, tok) = match chars.next()? {
280 'a'..='z' | 'A'..='Z' | '_' => (self.consumed(1, Self::mod_then_ident), Tok::Word),
281 '$' => (self.consumed(1, Self::ident1), Tok::Var),
282 '@' => (self.consumed(1, Self::ident1), Tok::Fmt),
283 '0'..='9' => (self.consumed(1, Self::num), Tok::Num),
284 c if is_op(c) => (self.consumed(1, |lex| lex.trim(is_op)), Tok::Sym),
285 '.' => match chars.next() {
286 Some('.') => (self.take(2), Tok::Sym),
287 Some('a'..='z' | 'A'..='Z' | '_') => (self.consumed(2, Self::ident0), Tok::Sym),
288 _ => (self.take(1), Tok::Sym),
289 },
290 ':' | ';' | ',' | '?' => (self.take(1), Tok::Sym),
291 '"' => self.with_consumed(Self::str),
292 '(' | '[' | '{' => self.with_consumed(Self::block),
293 _ => return None,
294 };
295 Some(Token(s, tok))
296 }
297
298 fn tokens(&mut self) -> Vec<Token<&'a str>> {
299 core::iter::from_fn(|| self.token()).collect()
300 }
301
302 fn block(&mut self) -> Tok<&'a str> {
306 let open = self.take(1);
307 let close = match open {
308 "(" => ')',
309 "[" => ']',
310 "{" => '}',
311 _ => panic!(),
312 };
313 let mut tokens = self.tokens();
314
315 self.space();
316 if let Some(rest) = self.i.strip_prefix(close) {
317 tokens.push(Token(&self.i[..1], Tok::Sym));
318 self.i = rest;
319 } else {
320 self.e.push((Expect::Delim(open), self.i));
321 }
322 Tok::Block(tokens)
323 }
324}
325
326impl<'a> Token<&'a str> {
327 pub fn opt_as_str(found: Option<&Self>, code: &'a str) -> &'a str {
331 found.map_or(&code[code.len()..], |found| found.as_str())
332 }
333
334 pub fn as_str(&self) -> &'a str {
336 self.0
337 }
338}