1use crate::{context::Context, input::Input, location::Location, log, parser::State};
2#[cfg(debug_assertions)]
3use colored::*;
4use core::fmt::Debug;
5use std::marker::PhantomData;
6
7pub trait Lexer<'i, C, S, TK>
21where
22 C: Context<'i, Self::Input, S, TK>,
23 S: State,
24{
25 type Input: Input + ?Sized;
26
27 fn next_tokens(
39 &self,
40 context: &mut C,
41 input: &'i Self::Input,
42 expected_tokens: Vec<(TK, bool)>,
43 ) -> Box<dyn Iterator<Item = Token<'i, Self::Input, TK>> + 'i>;
44}
45
46pub trait TokenRecognizer<'i> {
49 fn recognize(&self, _input: &'i str) -> Option<&'i str> {
50 panic!("Recognize is not defined.")
51 }
52}
53
54pub struct StringLexer<C, S, TK, TR: 'static, const TERMINAL_COUNT: usize> {
57 skip_ws: bool,
58 token_recognizers: &'static [TR; TERMINAL_COUNT],
59 phantom: PhantomData<(C, S, TK)>,
60}
61
62impl<
63 'i,
64 C: Context<'i, str, S, TK>,
65 S: State,
66 TK,
67 TR: TokenRecognizer<'i>,
68 const TERMINAL_COUNT: usize,
69 > StringLexer<C, S, TK, TR, TERMINAL_COUNT>
70{
71 pub fn new(skip_ws: bool, token_recognizers: &'static [TR; TERMINAL_COUNT]) -> Self {
72 Self {
73 skip_ws,
74 token_recognizers,
75 phantom: PhantomData,
76 }
77 }
78
79 fn skip(input: &'i str, context: &mut C) {
80 let skipped_len: usize = input[context.position()..]
81 .chars()
82 .take_while(|x| x.is_whitespace())
83 .map(|c| c.len_utf8())
84 .sum();
85 if skipped_len > 0 {
86 let skipped = &input[context.position()..context.position() + skipped_len];
87 log!("\t{} {}", "Skipped ws:".bold().green(), skipped_len);
88 context.set_layout_ahead(Some(skipped));
89 context.set_position(context.position() + skipped_len);
90 context.set_location(skipped.location_after(context.location()));
91 } else {
92 context.set_layout_ahead(None);
93 }
94 }
95}
96
97struct TokenIterator<'i, TR: 'static, TK> {
98 input: &'i str,
99 position: usize,
100 location: Location,
101 token_recognizers: Vec<(&'static TR, TK, bool)>,
102 index: usize,
103 finish: bool,
104}
105
106impl<'i, TR, TK> TokenIterator<'i, TR, TK> {
107 fn new(
108 input: &'i str,
109 position: usize,
110 location: Location,
111 token_recognizers: Vec<(&'static TR, TK, bool)>,
112 ) -> Self {
113 Self {
114 input,
115 position,
116 location,
117 token_recognizers,
118 index: 0,
119 finish: false,
120 }
121 }
122}
123
124impl<'i, TK, TR> Iterator for TokenIterator<'i, TR, TK>
125where
126 TR: TokenRecognizer<'i>,
127 TK: Copy,
128{
129 type Item = Token<'i, str, TK>;
130
131 fn next(&mut self) -> Option<Self::Item> {
132 loop {
133 if !self.finish && self.index < self.token_recognizers.len() {
134 let (recognizer, token_kind, finish) = &self.token_recognizers[self.index];
135 self.index += 1;
136 if let Some(recognized) = recognizer.recognize(&self.input[self.position..]) {
137 self.finish = *finish;
138 return Some(Token {
139 kind: *token_kind,
140 value: recognized,
141 location: recognized.location_span(self.location),
142 });
143 }
144 } else {
145 return None;
146 }
147 }
148 }
149}
150
151impl<'i, C, S, TK, TR, const TERMINAL_COUNT: usize> Lexer<'i, C, S, TK>
152 for StringLexer<C, S, TK, TR, TERMINAL_COUNT>
153where
154 C: Context<'i, str, S, TK>,
155 S: State + Into<usize>,
156 TK: Debug + Into<usize> + Copy + 'i,
157 TR: TokenRecognizer<'i>,
158{
159 type Input = str;
160
161 fn next_tokens(
162 &self,
163 context: &mut C,
164 input: &'i Self::Input,
165 expected_tokens: Vec<(TK, bool)>,
166 ) -> Box<dyn Iterator<Item = Token<'i, Self::Input, TK>> + 'i> {
167 if self.skip_ws {
168 Self::skip(input, context);
169 }
170 log!(" {} {:?}", "Trying recognizers:".green(), expected_tokens);
171
172 dbg!(context.position(), context.location());
173
174 Box::new(TokenIterator::new(
175 input,
176 context.position(),
177 context.location(),
178 expected_tokens
179 .iter()
180 .map(|&tok| (&self.token_recognizers[tok.0.into()], tok.0, tok.1))
181 .collect::<Vec<_>>(),
182 ))
183 }
184}
185
186pub struct Token<'i, I: Input + ?Sized, TK> {
188 pub kind: TK,
189
190 pub value: &'i I,
192
193 pub location: Location,
195}
196
197impl<I: Input + ?Sized, TK: Copy> Clone for Token<'_, I, TK> {
198 fn clone(&self) -> Self {
199 Self {
200 kind: self.kind,
201 value: self.value,
202 location: self.location,
203 }
204 }
205}
206
207impl<I, TK> Debug for Token<'_, I, TK>
208where
209 I: Input + ?Sized,
210 I::Output: Debug,
211 TK: Debug,
212{
213 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
214 write!(
215 f,
216 "{:?}({:?} {:?})",
217 self.kind,
218 if self.value.len() > 50 {
219 format!(
220 "{:?}{}{:?}",
221 &self.value.slice(0..20),
222 "..<snip>..",
223 &self.value.slice(self.value.len() - 20..self.value.len())
224 )
225 } else {
226 format!("{:?}", self.value)
227 },
228 self.location
229 )
230 }
231}