1#[derive(Debug, PartialEq, Copy, Clone)]
2pub enum TokenError {
3 InvalidNumber,
6
7 MissingQuoteEnd,
9
10 InvalidUnquotedString,
11
12 InvalidEscape,
13}
14
15#[derive(Debug, PartialEq, Clone)]
16pub enum Token<'a> {
17 Error((&'a str, TokenError)),
18 Whitespace(&'a str),
19 Comment(&'a str),
20
21 Str(&'a str),
22 QStr(String),
23
24 OpenBracket, CloseBracket, OpenParens, CloseParens, OpenCurly, CloseCurly, UInt(u64),
32 SInt(i64),
33 Float(f64),
34}
35
36#[inline]
37fn scan<F: Fn(char) -> bool>(s: &str, cond: F) -> (&str, &str) {
38 match s.find(|c: char| !cond(c)) {
40 None => (s, ""),
41 Some(pos) => s.split_at(pos),
42 }
43}
44
45#[inline]
46pub fn is_token_delim(c: char) -> bool {
47 c.is_whitespace()
48 || c == '('
49 || c == ')'
50 || c == '['
51 || c == ']'
52 || c == '{'
53 || c == '}'
54 || c == '#'
55}
56
57fn is_valid_unquoted_string(s: &str) -> bool {
58 !(s.contains('\\') || s.contains('"'))
59}
60
61fn next_token<'a>(s: &'a str) -> Option<(Token<'a>, &'a str)> {
62 let mut chars = s.chars();
63 match chars.next() {
64 None => None,
65 Some(c) => {
66 match c {
67 '(' => Some((Token::OpenParens, chars.as_str())),
68 ')' => Some((Token::CloseParens, chars.as_str())),
69
70 '[' => Some((Token::OpenBracket, chars.as_str())),
71 ']' => Some((Token::CloseBracket, chars.as_str())),
72
73 '{' => Some((Token::OpenCurly, chars.as_str())),
74 '}' => Some((Token::CloseCurly, chars.as_str())),
75
76 '#' => {
77 let (comment, rest) = scan(chars.as_str(), |ch| ch != '\n');
79 Some((Token::Comment(comment), rest))
80 }
81
82 '"' => {
83 let mut unquoted_string = String::new();
84
85 loop {
86 match chars.next() {
87 None => {
88 return Some((
90 Token::Error((chars.as_str(), TokenError::MissingQuoteEnd)),
91 s,
92 ));
93 }
94 Some(ch) => {
95 match ch {
96 '"' => {
97 break;
99 }
100 '\\' => {
101 match chars.next() {
103 Some('\\') => {
104 unquoted_string.push('\\');
105 }
106 Some('"') => {
107 unquoted_string.push('"');
108 }
109 _ => {
110 return Some((
111 Token::Error((
112 chars.as_str(),
113 TokenError::InvalidEscape,
114 )),
115 s,
116 ));
117 }
118 }
119 }
120 _ => {
121 unquoted_string.push(ch);
122 }
123 }
124 }
125 }
126 }
127
128 Some((Token::QStr(unquoted_string), chars.as_str()))
129 }
130
131 c if char::is_whitespace(c) => {
132 let (ws, rest) = scan(s, char::is_whitespace);
133 Some((Token::Whitespace(ws), rest))
134 }
135
136 '+' | '-' => {
137 let (string, rest) = scan(s, |ch| !is_token_delim(ch));
138 assert!(string.len() > 0);
139
140 match chars.next() {
146 Some(ch) if char::is_digit(ch, 10) => {
147 if let Ok(i) = string.parse::<i64>() {
150 Some((Token::SInt(i), rest))
151 } else if let Ok(i) = string.parse::<f64>() {
152 Some((Token::Float(i), rest))
153 } else {
154 Some((Token::Error((string, TokenError::InvalidNumber)), rest))
155 }
156 }
157 _ => {
158 if is_valid_unquoted_string(string) {
161 Some((Token::Str(string), rest))
162 } else {
163 Some((
164 Token::Error((string, TokenError::InvalidUnquotedString)),
165 rest,
166 ))
167 }
168 }
169 }
170 }
171
172 '0'...'9' => {
173 let (string, rest) = scan(s, |ch| !is_token_delim(ch));
176 assert!(string.len() > 0);
177
178 if let Ok(i) = string.parse::<u64>() {
179 Some((Token::UInt(i), rest))
180 } else if let Ok(i) = string.parse::<f64>() {
181 Some((Token::Float(i), rest))
182 } else {
183 Some((Token::Error((string, TokenError::InvalidNumber)), rest))
184 }
185 }
186
187 _ => {
188 let (string, rest) = scan(s, |ch| !is_token_delim(ch));
192 assert!(string.len() > 0);
193
194 if is_valid_unquoted_string(string) {
195 Some((Token::Str(string), rest))
196 } else {
197 Some((
198 Token::Error((string, TokenError::InvalidUnquotedString)),
199 rest,
200 ))
201 }
202 }
203 }
204 }
205 }
206}
207
208pub struct Tokenizer<'a> {
209 current: &'a str,
210 ignore_ws: bool,
211}
212
213impl<'a> Tokenizer<'a> {
214 pub fn new(s: &'a str, ignore_ws: bool) -> Tokenizer<'a> {
215 Tokenizer {
216 current: s,
217 ignore_ws: ignore_ws,
218 }
219 }
220
221 pub fn with_curly_around(self) -> CurlyAroundTokenizer<'a> {
222 CurlyAroundTokenizer::new(self)
223 }
224}
225
226impl<'a> Iterator for Tokenizer<'a> {
227 type Item = Token<'a>;
228
229 fn next(&mut self) -> Option<Self::Item> {
230 loop {
231 match next_token(self.current) {
232 Some((tok, rest)) => {
233 self.current = rest;
234 if self.ignore_ws {
235 match tok {
236 Token::Whitespace(_) | Token::Comment(_) => continue,
237 _ => {}
238 }
239 }
240 return Some(tok);
241 }
242 None => {
243 return None;
244 }
245 }
246 }
247 }
248}
249
250enum State {
251 Begin,
252 Inner,
253 End,
254}
255
256pub struct CurlyAroundTokenizer<'a> {
257 inner: Tokenizer<'a>,
258 state: State,
259}
260
261impl<'a> CurlyAroundTokenizer<'a> {
262 pub fn new(inner: Tokenizer<'a>) -> CurlyAroundTokenizer<'a> {
263 CurlyAroundTokenizer {
264 inner: inner,
265 state: State::Begin,
266 }
267 }
268}
269
270impl<'a> Iterator for CurlyAroundTokenizer<'a> {
271 type Item = Token<'a>;
272
273 fn next(&mut self) -> Option<Self::Item> {
274 match self.state {
275 State::Begin => {
276 self.state = State::Inner;
277 Some(Token::OpenCurly)
278 }
279 State::Inner => {
280 if let Some(tok) = self.inner.next() {
281 return Some(tok);
282 }
283 self.state = State::End;
284 Some(Token::CloseCurly)
285 }
286 State::End => None,
287 }
288 }
289}
290
291#[test]
292fn test_tokenizer_whitespace() {
293 let t = Tokenizer::new(" (abc 123)", false);
294 let tokens: Vec<_> = t.into_iter().collect();
295 assert_eq!(
296 vec![
297 Token::Whitespace(" "),
298 Token::OpenParens,
299 Token::Str("abc"),
300 Token::Whitespace(" "),
301 Token::UInt(123),
302 Token::CloseParens
303 ],
304 tokens
305 );
306}
307
308#[test]
309fn test_tokenizer_comment() {
310 let t = Tokenizer::new(" (abc#comment\n 123)", false);
311 let tokens: Vec<_> = t.into_iter().collect();
312 assert_eq!(
313 vec![
314 Token::Whitespace(" "),
315 Token::OpenParens,
316 Token::Str("abc"),
317 Token::Comment("comment"),
318 Token::Whitespace("\n "),
319 Token::UInt(123),
320 Token::CloseParens
321 ],
322 tokens
323 );
324}
325
326#[test]
327fn test_tokenizer_curly_around() {
328 let t = CurlyAroundTokenizer::new(Tokenizer::new(" (abc 123)", true));
329 let tokens: Vec<_> = t.into_iter().collect();
330 assert_eq!(
331 vec![
332 Token::OpenCurly,
333 Token::OpenParens,
334 Token::Str("abc"),
335 Token::UInt(123),
336 Token::CloseParens,
337 Token::CloseCurly
338 ],
339 tokens
340 );
341}
342
343#[test]
344fn test_tokenizer_no_whitespace() {
345 let t = Tokenizer::new(" (abc 123)", true);
346 let tokens: Vec<_> = t.into_iter().collect();
347 assert_eq!(
348 vec![
349 Token::OpenParens,
350 Token::Str("abc"),
351 Token::UInt(123),
352 Token::CloseParens
353 ],
354 tokens
355 );
356}
357
358#[test]
359fn test_token() {
360 assert_eq!(Some((Token::Whitespace(" "), "abc")), next_token(" abc"));
361 assert_eq!(Some((Token::Str("abc"), "")), next_token("abc"));
362 assert_eq!(Some((Token::Str("abc"), "(")), next_token("abc("));
363
364 assert_eq!(Some((Token::OpenParens, ")")), next_token("()"));
365
366 assert_eq!(Some((Token::UInt(12345), "")), next_token("12345"));
367 assert_eq!(Some((Token::UInt(12345), " ")), next_token("12345 "));
368 assert_eq!(Some((Token::SInt(12345), " ")), next_token("+12345 "));
369 assert_eq!(Some((Token::SInt(-12345), " ")), next_token("-12345 "));
370 assert_eq!(Some((Token::Str("-a"), " ")), next_token("-a "));
371 assert_eq!(Some((Token::Str("+a"), " ")), next_token("+a "));
372 assert_eq!(Some((Token::Str("+a"), "(")), next_token("+a("));
373
374 assert_eq!(
375 Some((Token::Error(("12345+", TokenError::InvalidNumber)), "")),
376 next_token("12345+")
377 );
378 assert_eq!(Some((Token::UInt(12345), " +")), next_token("12345 +"));
379 assert_eq!(Some((Token::Float(12345.123), "")), next_token("12345.123"));
380 assert_eq!(
381 Some((Token::Float(12345.123), "(")),
382 next_token("12345.123(")
383 );
384
385 assert_eq!(
386 Some((
387 Token::Error(("abc\\", TokenError::InvalidUnquotedString)),
388 " test"
389 )),
390 next_token("abc\\ test")
391 );
392 assert_eq!(
393 Some((
394 Token::Error(("abc\"", TokenError::InvalidUnquotedString)),
395 " test"
396 )),
397 next_token("abc\" test")
398 );
399
400 assert_eq!(
401 Some((Token::QStr("".to_string()), "(")),
402 next_token("\"\"(")
403 );
404 assert_eq!(
405 Some((Token::QStr("abc".to_string()), "(")),
406 next_token("\"abc\"(")
407 );
408 assert_eq!(
409 Some((Token::QStr("a\"b".to_string()), "(")),
410 next_token("\"a\\\"b\"(")
411 );
412 assert_eq!(
413 Some((Token::QStr("a\\b".to_string()), "(")),
414 next_token("\"a\\\\b\"(")
415 );
416
417 assert_eq!(
418 Some((Token::Error(("", TokenError::MissingQuoteEnd)), "\"abc ")),
419 next_token("\"abc ")
420 );
421
422 assert_eq!(
424 Some((Token::Error((" ", TokenError::InvalidEscape)), "\"abc\\n ")),
425 next_token("\"abc\\n ")
426 );
427}