1use alloc::vec::Vec;
4
5#[derive(Debug)]
10pub enum StrPart<S, F> {
11 Str(S),
13 Filter(F),
15 Char(char),
17}
18
19#[derive(Debug)]
21pub enum Token<S> {
22 Word(S),
24 Num(S),
26 Str(S, Vec<StrPart<S, Self>>),
28 Op(S),
32 Char(S),
34 Block(S, Vec<Self>),
36}
37
38#[derive(Clone, Debug)]
42pub enum Expect<S> {
43 Digit,
45 Ident,
47 Delim(S),
49 Escape,
51 Unicode,
53 Token,
55}
56
57impl<'a> Expect<&'a str> {
58 pub fn as_str(&self) -> &'static str {
60 match self {
61 Self::Digit => "digit",
62 Self::Ident => "identifier",
63 Self::Delim("(") => "closing parenthesis",
64 Self::Delim("[") => "closing bracket",
65 Self::Delim("{") => "closing brace",
66 Self::Delim("\"") => "closing quote",
67 Self::Delim(_) => panic!(),
68 Self::Escape => "string escape sequence",
69 Self::Unicode => "4-digit hexadecimal UTF-8 code point",
70 Self::Token => "token",
71 }
72 }
73}
74
75pub type Error<S> = (Expect<S>, S);
77
78pub struct Lexer<S> {
80 i: S,
81 e: Vec<Error<S>>,
82}
83
84impl<'a> Lexer<&'a str> {
85 #[must_use]
87 pub fn new(i: &'a str) -> Self {
88 let e = Vec::new();
89 Self { i, e }
90 }
91
92 pub fn lex(mut self) -> Result<Vec<Token<&'a str>>, Vec<Error<&'a str>>> {
94 let tokens = self.tokens();
95 self.space();
96 if !self.i.is_empty() {
97 self.e.push((Expect::Token, self.i));
98 }
99
100 if self.e.is_empty() {
101 Ok(tokens)
102 } else {
103 Err(self.e)
104 }
105 }
106
107 fn next(&mut self) -> Option<char> {
108 let mut chars = self.i.chars();
109 let c = chars.next()?;
110 self.i = chars.as_str();
111 Some(c)
112 }
113
114 fn take(&mut self, len: usize) -> &'a str {
115 let (head, tail) = self.i.split_at(len);
116 self.i = tail;
117 head
118 }
119
120 fn trim(&mut self, f: impl FnMut(char) -> bool) {
121 self.i = self.i.trim_start_matches(f);
122 }
123
124 fn consumed(&mut self, skip: usize, f: impl FnOnce(&mut Self)) -> &'a str {
125 self.with_consumed(|l| {
126 l.i = &l.i[skip..];
127 f(l)
128 })
129 .0
130 }
131
132 fn with_consumed<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> (&'a str, T) {
133 let start = self.i;
134 let y = f(self);
135 (&start[..start.len() - self.i.len()], y)
136 }
137
138 fn space(&mut self) {
140 self.i = self.i.trim_start();
141 while let Some(comment) = self.i.strip_prefix('#') {
142 self.i = comment.trim_start_matches(|c| c != '\n').trim_start();
143 }
144 }
145
146 fn mod_then_ident(&mut self) {
147 self.ident0();
148 if let Some(rest) = self.i.strip_prefix("::") {
149 self.i = rest.strip_prefix(['@', '$']).unwrap_or(rest);
150 self.ident1();
151 }
152 }
153
154 fn ident0(&mut self) {
156 self.trim(|c: char| c.is_ascii_alphanumeric() || c == '_');
157 }
158
159 fn ident1(&mut self) {
161 let first = |c: char| c.is_ascii_alphabetic() || c == '_';
162 if let Some(rest) = self.i.strip_prefix(first) {
163 self.i = rest;
164 self.ident0();
165 } else {
166 self.e.push((Expect::Ident, self.i));
167 }
168 }
169
170 fn digits1(&mut self) {
172 if let Some(rest) = self.i.strip_prefix(|c: char| c.is_ascii_digit()) {
173 self.i = rest.trim_start_matches(|c: char| c.is_ascii_digit());
174 } else {
175 self.e.push((Expect::Digit, self.i));
176 }
177 }
178
179 fn num(&mut self) {
181 self.trim(|c| c.is_ascii_digit());
182 if let Some(i) = self.i.strip_prefix('.') {
183 self.i = i;
184 self.digits1();
185 }
186 if let Some(i) = self.i.strip_prefix(['e', 'E']) {
187 self.i = i.strip_prefix(['+', '-']).unwrap_or(i);
188 self.digits1();
189 }
190 }
191
192 fn escape(&mut self) -> Option<StrPart<&'a str, Token<&'a str>>> {
193 let mut chars = self.i.chars();
194 let part = match chars.next() {
195 Some(c @ ('\\' | '/' | '"')) => StrPart::Char(c),
196 Some('b') => StrPart::Char('\x08'),
197 Some('f') => StrPart::Char('\x0C'),
198 Some('n') => StrPart::Char('\n'),
199 Some('r') => StrPart::Char('\r'),
200 Some('t') => StrPart::Char('\t'),
201 Some('u') => {
202 let mut hex = 0;
203 for _ in 0..4 {
204 let i = chars.as_str();
205 match chars.next().and_then(|c| c.to_digit(16)) {
206 Some(digit) => hex = (hex << 4) + digit,
207 None => {
208 self.i = i;
209 self.e.push((Expect::Unicode, self.i));
210 return None;
211 }
212 }
213 }
214 StrPart::Char(char::from_u32(hex).unwrap())
215 }
216 Some('(') => {
217 let (full, tokens) = self.with_consumed(Self::delim);
218 return Some(StrPart::Filter(Token::Block(full, tokens)));
219 }
220 Some(_) | None => {
221 self.e.push((Expect::Escape, self.i));
222 return None;
223 }
224 };
225
226 self.i = chars.as_str();
227 Some(part)
228 }
229
230 fn str(&mut self) -> Vec<StrPart<&'a str, Token<&'a str>>> {
234 let start = self.take(1);
235 assert_eq!(start, "\"");
236 let mut parts = Vec::new();
237
238 loop {
239 let s = self.consumed(0, |lex| lex.trim(|c| c != '\\' && c != '"'));
240 if !s.is_empty() {
241 parts.push(StrPart::Str(s));
242 }
243 match self.next() {
244 Some('"') => return parts,
245 Some('\\') => self.escape().map(|part| parts.push(part)),
246 Some(_) => unreachable!(),
248 None => {
249 self.e.push((Expect::Delim(start), self.i));
250 return parts;
251 }
252 };
253 }
254 }
255
256 fn token(&mut self) -> Option<Token<&'a str>> {
257 self.space();
258
259 let is_op = |c| "|=!<>+-*/%".contains(c);
260
261 let mut chars = self.i.chars();
262 Some(match chars.next()? {
263 'a'..='z' | 'A'..='Z' | '_' => Token::Word(self.consumed(1, Self::mod_then_ident)),
264 '$' | '@' => Token::Word(self.consumed(1, Self::ident1)),
265 '0'..='9' => Token::Num(self.consumed(1, Self::num)),
266 c if is_op(c) => Token::Op(self.consumed(1, |lex| lex.trim(is_op))),
267 '.' => match chars.next() {
268 Some('.') => Token::Char(self.take(2)),
269 Some('a'..='z' | 'A'..='Z' | '_') => Token::Char(self.consumed(2, Self::ident0)),
270 _ => Token::Char(self.take(1)),
271 },
272 ':' | ';' | ',' | '?' => Token::Char(self.take(1)),
273 '"' => {
274 let (full, parts) = self.with_consumed(Self::str);
275 Token::Str(full, parts)
276 }
277 '(' | '[' | '{' => {
278 let (full, tokens) = self.with_consumed(Self::delim);
279 Token::Block(full, tokens)
280 }
281 _ => return None,
282 })
283 }
284
285 fn tokens(&mut self) -> Vec<Token<&'a str>> {
286 core::iter::from_fn(|| self.token()).collect()
287 }
288
289 fn delim(&mut self) -> Vec<Token<&'a str>> {
293 let open = self.take(1);
294 let close = match open {
295 "(" => ')',
296 "[" => ']',
297 "{" => '}',
298 _ => panic!(),
299 };
300 let mut tokens = self.tokens();
301
302 self.space();
303 if let Some(rest) = self.i.strip_prefix(close) {
304 tokens.push(Token::Char(&self.i[..1]));
305 self.i = rest;
306 } else {
307 self.e.push((Expect::Delim(open), self.i));
308 }
309 tokens
310 }
311}
312
313impl<'a> Token<&'a str> {
314 pub fn opt_as_str(found: Option<&Self>, code: &'a str) -> &'a str {
318 found.map_or(&code[code.len()..], |found| found.as_str())
319 }
320
321 pub fn as_str(&self) -> &'a str {
323 match self {
324 Self::Word(s) | Self::Char(s) | Self::Op(s) | Self::Num(s) => s,
325 Self::Str(s, _) | Self::Block(s, _) => s,
326 }
327 }
328
329 pub fn span(&self, code: &str) -> crate::Span {
331 span(code, self.as_str())
332 }
333}
334
335pub fn span(whole: &str, part: &str) -> crate::Span {
339 let start = part.as_ptr() as usize - whole.as_ptr() as usize;
340 start..start + part.len()
341}