1#![allow(clippy::should_implement_trait, clippy::type_complexity)]
2
3use std::iter::Peekable;
4use std::str::Chars;
5
6use unicode_width::UnicodeWidthChar;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub struct LexerError<E> {
10 pub location: Loc,
11 pub kind: LexerErrorKind<E>,
12}
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum LexerErrorKind<E> {
16 InvalidToken,
18
19 Custom(E),
21}
22
23#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
25pub struct Loc {
26 pub line: u32,
28
29 pub col: u32,
31
32 pub byte_idx: usize,
34}
35
36impl Loc {
37 const ZERO: Loc = Loc {
38 line: 0,
39 col: 0,
40 byte_idx: 0,
41 };
42}
43
44pub enum SemanticActionResult<T> {
47 Continue,
49 Return(T),
51}
52
53impl<T> SemanticActionResult<T> {
54 pub fn map_token<F, T1>(self, f: F) -> SemanticActionResult<T1>
55 where
56 F: Fn(T) -> T1,
57 {
58 match self {
59 SemanticActionResult::Continue => SemanticActionResult::Continue,
60 SemanticActionResult::Return(t) => SemanticActionResult::Return(f(t)),
61 }
62 }
63}
64
65#[derive(Debug, Clone)]
69pub struct Lexer<'input, Iter: Iterator<Item = char> + Clone, Token, State, Error, Wrapper> {
70 pub __state: usize,
72
73 pub __done: bool,
75
76 pub __initial_state: usize,
78
79 user_state: State,
80
81 input: &'input str,
83
84 iter_loc: Loc,
86
87 pub __iter: Peekable<Iter>,
91
92 current_match_start: Loc,
94
95 current_match_end: Loc,
97
98 last_match: Option<(
104 Loc,
105 Peekable<Iter>,
106 for<'lexer> fn(&'lexer mut Wrapper) -> SemanticActionResult<Result<Token, Error>>,
107 Loc,
108 )>,
109}
110
111impl<I: Iterator<Item = char> + Clone, T, S: Default, E, W> Lexer<'static, I, T, S, E, W> {
112 pub fn new_from_iter(iter: I) -> Self {
113 Self::new_from_iter_with_state(iter, Default::default())
114 }
115}
116
117impl<I: Iterator<Item = char> + Clone, T, S, E, W> Lexer<'static, I, T, S, E, W> {
118 pub fn new_from_iter_with_state(iter: I, state: S) -> Self {
119 Self {
120 __state: 0,
121 __done: false,
122 __initial_state: 0,
123 user_state: state,
124 input: "",
125 iter_loc: Loc::ZERO,
126 __iter: iter.peekable(),
127 current_match_start: Loc::ZERO,
128 current_match_end: Loc::ZERO,
129 last_match: None,
130 }
131 }
132}
133
134impl<'input, T, S: Default, E, W> Lexer<'input, Chars<'input>, T, S, E, W> {
135 pub fn new(input: &'input str) -> Self {
136 Self::new_with_state(input, Default::default())
137 }
138}
139
140impl<'input, T, S, E, W> Lexer<'input, Chars<'input>, T, S, E, W> {
141 pub fn new_with_state(input: &'input str, state: S) -> Self {
142 Self {
143 __state: 0,
144 __done: false,
145 __initial_state: 0,
146 user_state: state,
147 input,
148 iter_loc: Loc::ZERO,
149 __iter: input.chars().peekable(),
150 current_match_start: Loc::ZERO,
151 current_match_end: Loc::ZERO,
152 last_match: None,
153 }
154 }
155}
156
157impl<'input, I: Iterator<Item = char> + Clone, T, S, E, W> Lexer<'input, I, T, S, E, W> {
158 pub fn next(&mut self) -> Option<char> {
160 match self.__iter.next() {
161 None => None,
162 Some(char) => {
163 self.current_match_end.byte_idx += char.len_utf8();
164 if char == '\n' {
165 self.current_match_end.line += 1;
166 self.current_match_end.col = 0;
167 } else if char == '\t' {
168 self.current_match_end.col += 4; } else {
170 self.current_match_end.col += UnicodeWidthChar::width(char).unwrap_or(1) as u32;
171 }
172 Some(char)
173 }
174 }
175 }
176
177 pub fn peek(&mut self) -> Option<char> {
178 self.__iter.peek().copied()
179 }
180
181 pub fn backtrack(
183 &mut self,
184 ) -> Result<for<'lexer> fn(&'lexer mut W) -> SemanticActionResult<Result<T, E>>, LexerError<E>>
185 {
186 match self.last_match.take() {
187 None => {
188 self.__state = 0;
189 Err(LexerError {
190 location: self.current_match_start,
191 kind: LexerErrorKind::InvalidToken,
192 })
193 }
194 Some((match_start, iter, semantic_action, match_end)) => {
195 self.__done = false;
196 self.current_match_start = match_start;
197 self.current_match_end = match_end;
198 self.__iter = iter;
199 self.iter_loc = match_end;
200 Ok(semantic_action)
201 }
202 }
203 }
204
205 pub fn reset_accepting_state(&mut self) {
206 self.last_match = None;
207 }
208
209 pub fn set_accepting_state(
210 &mut self,
211 semantic_action_fn: for<'lexer> fn(&'lexer mut W) -> SemanticActionResult<Result<T, E>>,
212 ) {
213 self.last_match = Some((
214 self.current_match_start,
215 self.__iter.clone(),
216 semantic_action_fn,
217 self.current_match_end,
218 ));
219 }
220
221 pub fn reset_match(&mut self) {
222 self.current_match_start = self.current_match_end;
223 }
224
225 pub fn match_(&self) -> &'input str {
226 &self.input[self.current_match_start.byte_idx..self.current_match_end.byte_idx]
227 }
228
229 pub fn match_loc(&self) -> (Loc, Loc) {
230 (self.current_match_start, self.current_match_end)
231 }
232
233 pub fn state(&mut self) -> &mut S {
234 &mut self.user_state
235 }
236}