1use peekmore::{PeekMore, PeekMoreIterator};
7use std::iter::Peekable;
8
9use crate::model::token::*;
10
11pub fn lex(input: &'_ str) -> impl Iterator<Item = Token> + '_ {
16 let lexer = Lexer::new(input);
17 let lexer = TextJoiner::new(lexer);
18 SpaceCutter::new(lexer)
19}
20
21struct Lexer<'a> {
22 input: &'a str,
23 cursor: usize,
24}
25
26impl<'a> Lexer<'a> {
27 fn new(input: &'a str) -> Self {
28 Self { input, cursor: 0 }
29 }
30}
31
32impl<'a> Iterator for Lexer<'a> {
33 type Item = Token;
34
35 fn next(&mut self) -> Option<Self::Item> {
36 let mut chars = self.input.char_indices().skip(self.cursor).peekable();
37
38 let (kind, start, len) = if let Some((index, c)) = chars.next() {
39 let len = c.len_utf8();
40
41 let (kind, len) = match c {
42 '#' => (TokenKind::Pound, len),
43 '*' => (TokenKind::Star, len),
44 ':' => (TokenKind::Colon, len),
45 '`' => (TokenKind::Backquote, len),
46 '>' => (TokenKind::Gt, len),
47 '-' => (TokenKind::Hyphen, len),
48 '|' => (TokenKind::VerticalBar, len),
49 '.' => (TokenKind::Dot, len),
50 '(' => (TokenKind::OpenParen, len),
51 ')' => (TokenKind::CloseParen, len),
52 '{' => (TokenKind::OpenBrace, len),
53 '}' => (TokenKind::CloseBrace, len),
54 '[' => (TokenKind::OpenBracket, len),
55 ']' => (TokenKind::CloseBracket, len),
56 ' ' => (TokenKind::Space, len),
57 '\t' => (TokenKind::Tab, len),
58 '\n' => (TokenKind::Break, len),
59 '\r' => {
60 if let Some((_, c2)) = chars.next_if(|(_, c2)| c2 == &'\n') {
61 (TokenKind::Break, len + c2.len_utf8())
62 } else {
63 (TokenKind::Text, len)
64 }
65 }
66 '\\' => {
67 if let Some((_, c2)) = chars.next_if(|(_, c2)| {
68 matches!(
69 c2,
70 '#' | '*'
71 | ':'
72 | '`'
73 | '>'
74 | '-'
75 | '|'
76 | '.'
77 | '('
78 | ')'
79 | '{'
80 | '}'
81 | '['
82 | ']'
83 | '\\'
84 )
85 }) {
86 self.cursor += len + c2.len_utf8();
87 return Some(Token {
88 kind: TokenKind::Text,
89 start: index + len,
90 len: c2.len_utf8(),
91 });
92 } else {
93 (TokenKind::Text, len)
94 }
95 }
96 _ => (TokenKind::Text, len),
97 };
98
99 (kind, index, len)
100 } else {
101 return None;
102 };
103
104 self.cursor += len;
105
106 Some(Token { kind, start, len })
107 }
108}
109
110struct TextJoiner<T: Iterator<Item = Token>> {
111 iter: Peekable<T>,
112}
113
114impl<T: Iterator<Item = Token>> TextJoiner<T> {
115 fn new(iter: T) -> Self {
116 Self {
117 iter: iter.peekable(),
118 }
119 }
120}
121
122impl<T: Iterator<Item = Token>> Iterator for TextJoiner<T> {
123 type Item = Token;
124
125 fn next(&mut self) -> Option<Self::Item> {
126 let mut token = self.iter.next()?;
127
128 if token.kind == TokenKind::Text {
129 while let Some(next) = self.iter.peek() {
130 if next.kind == TokenKind::Text {
131 token.len += next.len;
132 self.iter.next();
133 } else {
134 break;
135 }
136 }
137 }
138
139 Some(token)
140 }
141}
142
143struct SpaceCutter<T: Iterator<Item = Token>> {
144 iter: PeekMoreIterator<T>,
145}
146
147impl<T: Iterator<Item = Token>> SpaceCutter<T> {
148 fn new(iter: T) -> Self {
149 Self {
150 iter: iter.peekmore(),
151 }
152 }
153}
154
155impl<T: Iterator<Item = Token>> Iterator for SpaceCutter<T> {
156 type Item = Token;
157
158 fn next(&mut self) -> Option<Self::Item> {
159 let token = self.iter.next()?;
160
161 use TokenKind::*;
162
163 if token.kind == Break
164 && self.iter.peek().is_some()
165 && self.iter.peek().unwrap().kind != Break
166 {
167 for n in 0.. {
168 if let Some(nth) = self.iter.peek_nth(n) {
169 match nth.kind {
170 Space | Tab => continue,
171 Break => {
172 self.iter.nth(n - 1).unwrap();
173 break;
174 }
175 _ => break,
176 }
177 } else {
178 self.iter.nth(n - 1).unwrap();
179 break;
180 }
181 }
182 }
183
184 Some(token)
185 }
186}
187
188#[cfg(test)]
189mod tests {
190 use super::*;
191
192 #[test]
193 fn test_lexer() {
194 let mut lexer = Lexer::new("## Hello\n");
195
196 assert_eq!(lexer.next().unwrap().kind, TokenKind::Pound);
197 assert_eq!(lexer.next().unwrap().kind, TokenKind::Pound);
198 assert_eq!(lexer.next().unwrap().kind, TokenKind::Space);
199 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
200 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
201 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
202 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
203 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
204 assert_eq!(lexer.next().unwrap().kind, TokenKind::Break);
205 assert_eq!(lexer.next(), None);
206
207 let mut lexer = Lexer::new("\r\n");
208
209 assert_eq!(lexer.next().unwrap().kind, TokenKind::Break);
210 assert_eq!(lexer.next(), None);
211
212 let mut lexer = Lexer::new("\r");
213
214 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
215 assert_eq!(lexer.next(), None);
216
217 let mut lexer = Lexer::new(r"\# Q");
218
219 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
220 assert_eq!(lexer.next().unwrap().kind, TokenKind::Space);
221 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
222 assert_eq!(lexer.next(), None);
223 }
224
225 #[test]
226 fn test_text_jointer() {
227 let mut lexer = TextJoiner::new(Lexer::new("## Hello Q\n"));
228
229 assert_eq!(lexer.next().unwrap().kind, TokenKind::Pound);
230 assert_eq!(lexer.next().unwrap().kind, TokenKind::Pound);
231 assert_eq!(lexer.next().unwrap().kind, TokenKind::Space);
232 assert_eq!(
233 lexer.next().unwrap(),
234 Token {
235 kind: TokenKind::Text,
236 start: 3,
237 len: 5
238 }
239 );
240 assert_eq!(lexer.next().unwrap().kind, TokenKind::Space);
241 assert_eq!(
242 lexer.next().unwrap(),
243 Token {
244 kind: TokenKind::Text,
245 start: 9,
246 len: 1
247 }
248 );
249 assert_eq!(lexer.next().unwrap().kind, TokenKind::Break);
250 assert_eq!(lexer.next(), None);
251 }
252
253 #[test]
254 fn test_space_cutter() {
255 let mut lexer = SpaceCutter::new(Lexer::new("ABC\n \nDEF"));
256
257 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
258 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
259 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
260 assert_eq!(lexer.next().unwrap().kind, TokenKind::Break);
261 assert_eq!(lexer.next().unwrap().kind, TokenKind::Break);
262 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
263 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
264 assert_eq!(lexer.next().unwrap().kind, TokenKind::Text);
265 }
266}