1use std::mem;
14
15pub(crate) struct Tokenizer<'a> {
16 internal: Vec<InternalTokenizer<'a>>,
17 active: usize,
18 switched: bool,
19}
20
21struct InternalTokenizer<'a> {
22 input: &'a str,
23 next: Option<Token>,
24 orig_len: usize,
25 returned_eof: usize,
26}
27
28#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
29pub struct Token {
31 pub(crate) kind: TokenKind,
32 pub(crate) span: TokenSpan,
33}
34
35impl Token {
36 #[must_use]
38 pub fn span(&self) -> TokenSpan {
39 self.span
40 }
41
42 #[must_use]
44 pub fn kind(&self) -> TokenKind {
45 self.kind
46 }
47}
48
49#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
50pub struct TokenSpan {
52 pub(crate) start: usize,
55
56 pub(crate) stop: usize,
59}
60
61impl TokenSpan {
62 #[must_use]
64 pub fn start(&self) -> usize {
65 self.start
66 }
67
68 #[must_use]
70 pub fn stop(&self) -> usize {
71 self.stop
72 }
73}
74
75impl std::fmt::Display for Token {
76 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77 write!(f, "{}", self.kind)
78 }
79}
80
81#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
82pub enum TokenKind {
84 And,
86 Or,
88 ParenOpen,
90 ParenClose,
92 Colon,
94
95 Char(char),
97
98 Eof,
100
101 Break,
105}
106impl std::fmt::Display for TokenKind {
107 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108 match self {
109 TokenKind::Char(other) => write!(f, "Token::Char({other:?})"),
110 TokenKind::And
111 | TokenKind::Or
112 | TokenKind::ParenOpen
113 | TokenKind::ParenClose
114 | TokenKind::Colon
115 | TokenKind::Break
116 | TokenKind::Eof => {
117 write!(f, "Token::")?;
118 <Self as std::fmt::Debug>::fmt(self, f)
119 }
120 }
121 }
122}
123
124impl<'a> Tokenizer<'a> {
125 pub(crate) fn from_slice<T>(input: T) -> Option<Self>
126 where
127 T: Iterator<Item = &'a str>,
128 {
129 let mut internal = vec![];
130 let mut previous = 0;
131 for (index, split) in input.into_iter().enumerate() {
132 internal.push(InternalTokenizer::new(split, previous + index));
133 previous += split.len();
134 }
135
136 if internal.is_empty() {
137 None
138 } else {
139 Some(Self {
140 internal,
141 active: 0,
142 switched: false,
143 })
144 }
145 }
146
147 pub(crate) fn peek(&mut self) -> Token {
148 self.check_valid();
149
150 if self.switched {
151 self.return_break_token()
152 } else {
153 self.active_tokenizer().peek()
154 }
155 }
156
157 pub(crate) fn next_token(&mut self) -> Token {
158 self.check_valid();
159 if self.switched {
160 let output = self.return_break_token();
161 self.switched = false;
162 return output;
163 }
164 self.active_tokenizer().next_token()
165 }
166
167 fn return_break_token(&mut self) -> Token {
168 let previous_location = self
169 .internal
170 .get(self.active - 1)
171 .expect("All indexes are valid")
172 .get_location();
173 Token {
174 kind: TokenKind::Break,
175 span: TokenSpan {
176 start: previous_location + 1,
177 stop: previous_location + 1,
178 },
179 }
180 }
181
182 fn check_valid(&mut self) {
183 {
185 let final_internal_index = self.internal.len() - 1;
186 let current_index = self.active;
187 let next_token = {
188 self.internal
189 .get_mut(current_index)
190 .expect("All indexes are valid")
191 .peek()
192 };
193 if next_token.kind == TokenKind::Eof && current_index != final_internal_index {
194 self.active += 1;
195 self.switched = true;
196 }
197 }
198 }
199
200 fn active_tokenizer(&mut self) -> &mut InternalTokenizer<'a> {
201 self.internal
202 .get_mut(self.active)
203 .expect("This should always be a valid index.")
204 }
205}
206
207impl<'a> InternalTokenizer<'a> {
208 fn new(input: &'a str, previous: usize) -> Self {
209 Self {
210 orig_len: input.len() + previous,
211 input,
212 next: None,
213 returned_eof: 0,
214 }
215 }
216
217 fn get_location(&self) -> usize {
218 self.orig_len - self.input.len()
219 }
220
221 fn next_token(&mut self) -> Token {
222 self.populate();
223 mem::take(&mut self.next).expect("`self.next` should be some.")
224 }
225
226 fn peek(&mut self) -> Token {
227 if self.next.is_none() {
228 self.populate();
229 }
230 self.next.expect("Is some")
231 }
232
233 fn actual_next_token(&mut self) -> Token {
234 if self.input.is_empty() {
235 self.returned_eof += 1;
236
237 assert!(
238 (self.returned_eof < 2),
239 "BUG: Tried to drain this tokenizer over EOF for {} times.",
240 self.returned_eof
241 );
242
243 return Token {
244 kind: TokenKind::Eof,
245 span: TokenSpan {
246 start: self.get_location(),
247 stop: self.get_location(),
248 },
249 };
250 }
251
252 let (token, size) = match self.next() {
253 'A' => self.tokenize_and(),
254 'O' => self.tokenize_or(),
255 '(' => (TokenKind::ParenOpen, 1),
256 ')' => (TokenKind::ParenClose, 1),
257 ':' => (TokenKind::Colon, 1),
258 other => (TokenKind::Char(other), other.len_utf8()),
259 };
260
261 let current_location = self.get_location();
262 self.chomp(size);
263
264 Token {
265 kind: token,
266 span: TokenSpan {
267 start: current_location,
268 stop: self.get_location(),
269 },
270 }
271 }
272
273 fn populate(&mut self) {
274 if self.next.is_none() {
275 let next = self.actual_next_token();
276 self.next = Some(next);
277 }
278 }
279
280 fn chomp(&mut self, number: usize) {
281 self.input = &self.input[number..];
282 }
283
284 fn take(&self, number: usize) -> &str {
285 &self.input[0..number]
286 }
287
288 fn next(&self) -> char {
289 self.input.chars().next().expect("Is not empty")
290 }
291
292 fn tokenize_and(&self) -> (TokenKind, usize) {
293 if self.take(3) == "AND" {
294 (TokenKind::And, 3)
295 } else {
296 (TokenKind::Char(self.next()), 1)
297 }
298 }
299
300 fn tokenize_or(&self) -> (TokenKind, usize) {
301 if self.take(2) == "OR" {
302 (TokenKind::Or, 2)
303 } else {
304 (TokenKind::Char(self.next()), 1)
305 }
306 }
307}