1use std::fmt;
2
3use combine::easy::{Error, Errors, Info};
4use combine::error::StreamError;
5use combine::stream::ResetStream;
6use combine::{Positioned, StreamOnce};
7
8use crate::position::Pos;
9
10#[derive(Debug, PartialEq, Eq, Clone, Copy)]
11pub enum Kind {
12 Punctuator,
13 Name,
14 IntValue,
15 FloatValue,
16 StringValue,
17 BlockString,
18}
19
20#[derive(Debug, PartialEq, Eq, Clone, Copy)]
21pub struct Token<'a> {
22 pub kind: Kind,
23 pub value: &'a str,
24}
25
26#[derive(Debug, PartialEq)]
27pub struct TokenStream<'a> {
28 buf: &'a str,
29 position: Pos,
30 off: usize,
31 next_state: Option<(usize, Token<'a>, usize, Pos)>,
32 recursion_limit: usize,
33}
34
35impl TokenStream<'_> {
36 pub(crate) fn offset(&self) -> usize {
37 self.off
38 }
39}
40
41#[derive(Clone, Debug, PartialEq)]
42pub struct Checkpoint {
43 position: Pos,
44 off: usize,
45}
46
47impl<'a> StreamOnce for TokenStream<'a> {
48 type Token = Token<'a>;
49 type Range = Token<'a>;
50 type Position = Pos;
51 type Error = Errors<Token<'a>, Token<'a>, Pos>;
52
53 fn uncons(&mut self) -> Result<Self::Token, Error<Token<'a>, Token<'a>>> {
54 if let Some((at, tok, off, pos)) = self.next_state {
55 if at == self.off {
56 self.off = off;
57 self.position = pos;
58 return Ok(tok);
59 }
60 }
61 let old_pos = self.off;
62 let (kind, len) = self.take_token()?;
63 let value = &self.buf[self.off - len..self.off];
64 self.skip_whitespace();
65 let token = Token { kind, value };
66 self.next_state = Some((old_pos, token, self.off, self.position));
67 Ok(token)
68 }
69}
70
71impl<'a> Positioned for TokenStream<'a> {
72 fn position(&self) -> Self::Position {
73 self.position
74 }
75}
76
77impl<'a> ResetStream for TokenStream<'a> {
78 type Checkpoint = Checkpoint;
79 fn checkpoint(&self) -> Self::Checkpoint {
80 Checkpoint {
81 position: self.position,
82 off: self.off,
83 }
84 }
85 fn reset(&mut self, checkpoint: Checkpoint) -> Result<(), Self::Error> {
86 self.position = checkpoint.position;
87 self.off = checkpoint.off;
88 Ok(())
89 }
90}
91
92fn check_int(value: &str) -> bool {
95 value == "0"
96 || value == "-0"
97 || (!value.starts_with('0')
98 && value != "-"
99 && !value.starts_with("-0")
100 && value[1..].chars().all(|x| x.is_ascii_digit()))
101}
102
103fn check_dec(value: &str) -> bool {
104 !value.is_empty() && value.chars().all(|x| x.is_ascii_digit())
105}
106
107fn check_exp(value: &str) -> bool {
108 if value.is_empty() {
109 return false;
110 }
111 let first = value.chars().next().unwrap();
112 if first != '-' && first != '+' && (first <= '0' || first >= '9') {
113 return false;
114 }
115
116 value[1..].chars().all(|x| x.is_ascii_digit())
117}
118
119fn check_float(value: &str, exponent: Option<usize>, real: Option<usize>) -> bool {
120 match (exponent, real) {
121 (Some(e), Some(r)) if e < r => false,
122 (Some(e), Some(r)) => {
123 check_int(&value[..r]) && check_dec(&value[r + 1..e]) && check_exp(&value[e + 1..])
124 }
125 (Some(e), None) => check_int(&value[..e]) && check_exp(&value[e + 1..]),
126 (None, Some(r)) => check_int(&value[..r]) && check_dec(&value[r + 1..]),
127 (None, None) => unreachable!(),
128 }
129}
130
131impl<'a> TokenStream<'a> {
132 pub fn new(s: &str) -> TokenStream {
133 Self::with_recursion_limit(s, 50)
134 }
135
136 pub(crate) fn with_recursion_limit(s: &str, recursion_limit: usize) -> TokenStream {
140 let mut me = TokenStream {
141 buf: s,
142 position: Pos { line: 1, column: 1 },
143 off: 0,
144 next_state: None,
145 recursion_limit,
146 };
147 me.skip_whitespace();
148 me
149 }
150
151 #[inline]
154 fn advance_token<T>(&mut self, kind: Kind, size: usize) -> Result<(Kind, usize), T> {
155 self.position.column += size;
156 self.off += size;
157 Ok((kind, size))
158 }
159
160 fn take_token(&mut self) -> Result<(Kind, usize), Error<Token<'a>, Token<'a>>> {
161 use self::Kind::*;
162 let mut iter = self.buf[self.off..].char_indices();
163 let cur_char = match iter.next() {
164 Some((_, x)) => x,
165 None => return Err(Error::end_of_input()),
166 };
167
168 match cur_char {
169 '(' | '[' | '{' => {
170 self.recursion_limit = self
172 .recursion_limit
173 .checked_sub(1)
174 .ok_or_else(|| Error::message_static_message("Recursion limit exceeded"))?;
175
176 self.advance_token(Punctuator, 1)
177 }
178 ')' | ']' | '}' => {
179 self.recursion_limit = self.recursion_limit.saturating_add(1);
190 self.advance_token(Punctuator, 1)
191 }
192 '!' | '$' | ':' | '=' | '@' | '|' | '&' => self.advance_token(Punctuator, 1),
193 '.' => {
194 if iter.as_str().starts_with("..") {
195 self.advance_token(Punctuator, 3)
196 } else {
197 Err(Error::Unexpected(Info::Owned(
198 format_args!(
199 "bare dot {:?} is not supported, \
200 only \"...\"",
201 cur_char
202 )
203 .to_string(),
204 )))
205 }
206 }
207 '_' | 'a'..='z' | 'A'..='Z' => {
208 for (idx, cur_char) in iter.by_ref() {
209 match cur_char {
210 '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' => continue,
211 _ => return self.advance_token(Name, idx),
212 }
213 }
214 let len = self.buf.len() - self.off;
215 self.position.column += len;
216 self.off += len;
217
218 Ok((Name, len))
219 }
220 '-' | '0'..='9' => {
221 let mut exponent = None;
222 let mut real = None;
223 let len = loop {
224 let (idx, cur_char) = match iter.next() {
225 Some(pair) => pair,
226 None => break self.buf.len() - self.off,
227 };
228 match cur_char {
229 ' ' | '\n' | '\r' | '\t' | ',' | '#' | '!' | '$' | ':' | '=' | '@'
231 | '|' | '&' | '(' | ')' | '[' | ']' | '{' | '}' => break idx,
232 '.' => real = Some(idx),
233 'e' | 'E' => exponent = Some(idx),
234 _ => {}
235 }
236 };
237
238 if exponent.is_some() || real.is_some() {
239 let value = &self.buf[self.off..][..len];
240 if !check_float(value, exponent, real) {
241 return Err(Error::Unexpected(Info::Owned(
242 format_args!("unsupported float {:?}", value).to_string(),
243 )));
244 }
245 self.position.column += len;
246 self.off += len;
247
248 Ok((FloatValue, len))
249 } else {
250 let value = &self.buf[self.off..][..len];
251 if !check_int(value) {
252 return Err(Error::Unexpected(Info::Owned(
253 format_args!("unsupported integer {:?}", value).to_string(),
254 )));
255 }
256 self.advance_token(IntValue, len)
257 }
258 }
259 '"' => {
260 if iter.as_str().starts_with("\"\"") {
261 let tail = &iter.as_str()[2..];
262 for (end_idx, _) in tail.match_indices("\"\"\"") {
263 if !tail[..end_idx].ends_with('\\') {
264 self.update_position(end_idx + 6);
265 return Ok((BlockString, end_idx + 6));
266 }
267 }
268
269 Err(Error::Unexpected(Info::Owned(
270 "unterminated block string value".to_string(),
271 )))
272 } else {
273 let mut nchars = 1;
274 let mut escaped = false;
275 for (idx, cur_char) in iter {
276 nchars += 1;
277 match cur_char {
278 '"' if escaped => {}
279 '"' => {
280 self.position.column += nchars;
281 self.off += idx + 1;
282 return Ok((StringValue, idx + 1));
283 }
284 '\n' => {
285 return Err(Error::Unexpected(Info::Owned(
286 "unterminated string value".to_string(),
287 )));
288 }
289
290 _ => {}
291 }
292
293 escaped = !escaped && cur_char == '\\';
295 }
296 Err(Error::Unexpected(Info::Owned(
297 "unterminated string value".to_string(),
298 )))
299 }
300 }
301 _ => Err(Error::Unexpected(Info::Owned(
302 format_args!("unexpected character {:?}", cur_char).to_string(),
303 ))),
304 }
305 }
306
307 fn skip_whitespace(&mut self) {
308 let mut iter = self.buf[self.off..].char_indices();
309 let idx = loop {
310 let (idx, cur_char) = match iter.next() {
311 Some(pair) => pair,
312 None => break self.buf.len() - self.off,
313 };
314 match cur_char {
315 '\u{feff}' | '\r' => continue,
316 '\t' => self.position.column += 8,
317 '\n' => {
318 self.position.column = 1;
319 self.position.line += 1;
320 }
321 ' ' | ',' => {
323 self.position.column += 1;
324 continue;
325 }
326 '#' => {
328 for (_, cur_char) in iter.by_ref() {
329 if cur_char == '\r' || cur_char == '\n' {
331 self.position.column = 1;
332 self.position.line += 1;
333 break;
334 }
335 }
336 continue;
337 }
338 _ => break idx,
339 }
340 };
341 self.off += idx;
342 }
343
344 fn update_position(&mut self, len: usize) {
345 let val = &self.buf[self.off..][..len];
346 self.off += len;
347 let lines = val.as_bytes().iter().filter(|&&x| x == b'\n').count();
348 self.position.line += lines;
349 if lines > 0 {
350 let line_offset = val.rfind('\n').unwrap() + 1;
351 let num = val[line_offset..].chars().count();
352 self.position.column = num + 1;
353 } else {
354 let num = val.chars().count();
355 self.position.column += num;
356 }
357 }
358}
359
360impl<'a> fmt::Display for Token<'a> {
361 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
362 write!(f, "{}[{:?}]", self.value, self.kind)
363 }
364}
365
366#[cfg(test)]
367mod test {
368 use super::Kind::*;
369 use super::{Kind, TokenStream};
370 use combine::easy::Error;
371
372 use combine::{Positioned, StreamOnce};
373
374 fn tok_str(s: &str) -> Vec<&str> {
375 let mut r = Vec::new();
376 let mut s = TokenStream::new(s);
377 loop {
378 match s.uncons() {
379 Ok(x) => r.push(x.value),
380 Err(ref e) if e == &Error::end_of_input() => break,
381 Err(e) => panic!("Parse error at {}: {}", s.position(), e),
382 }
383 }
384 r
385 }
386 fn tok_typ(s: &str) -> Vec<Kind> {
387 let mut r = Vec::new();
388 let mut s = TokenStream::new(s);
389 loop {
390 match s.uncons() {
391 Ok(x) => r.push(x.kind),
392 Err(ref e) if e == &Error::end_of_input() => break,
393 Err(e) => panic!("Parse error at {}: {}", s.position(), e),
394 }
395 }
396 r
397 }
398
399 #[test]
400 fn comments_and_commas() {
401 assert_eq!(tok_str("# hello { world }"), &[] as &[&str]);
402 assert_eq!(tok_str("# x\n,,,"), &[] as &[&str]);
403 assert_eq!(tok_str(", ,, ,,, # x"), &[] as &[&str]);
404 }
405
406 #[test]
407 fn simple() {
408 assert_eq!(tok_str("a { b }"), ["a", "{", "b", "}"]);
409 assert_eq!(tok_typ("a { b }"), [Name, Punctuator, Name, Punctuator]);
410 }
411
412 #[test]
413 fn query() {
414 assert_eq!(
415 tok_str(
416 "query Query {
417 object { field }
418 }"
419 ),
420 ["query", "Query", "{", "object", "{", "field", "}", "}"]
421 );
422 }
423
424 #[test]
425 fn fragment() {
426 assert_eq!(tok_str("a { ...b }"), ["a", "{", "...", "b", "}"]);
427 }
428
429 #[test]
430 fn int() {
431 assert_eq!(tok_str("0"), ["0"]);
432 assert_eq!(tok_str("0,"), ["0"]);
433 assert_eq!(tok_str("0# x"), ["0"]);
434 assert_eq!(tok_typ("0"), [IntValue]);
435 assert_eq!(tok_str("-0"), ["-0"]);
436 assert_eq!(tok_typ("-0"), [IntValue]);
437 assert_eq!(tok_str("-1"), ["-1"]);
438 assert_eq!(tok_typ("-1"), [IntValue]);
439 assert_eq!(tok_str("-132"), ["-132"]);
440 assert_eq!(tok_typ("-132"), [IntValue]);
441 assert_eq!(tok_str("132"), ["132"]);
442 assert_eq!(tok_typ("132"), [IntValue]);
443 assert_eq!(
444 tok_str("a(x: 10) { b }"),
445 ["a", "(", "x", ":", "10", ")", "{", "b", "}"]
446 );
447 assert_eq!(
448 tok_typ("a(x: 10) { b }"),
449 [
450 Name, Punctuator, Name, Punctuator, IntValue, Punctuator, Punctuator, Name,
451 Punctuator
452 ]
453 );
454 }
455
456 #[test]
458 #[should_panic]
459 fn zero_int() {
460 tok_str("01");
461 }
462 #[test]
463 #[should_panic]
464 fn zero_int4() {
465 tok_str("00001");
466 }
467 #[test]
468 #[should_panic]
469 fn minus_int() {
470 tok_str("-");
471 }
472 #[test]
473 #[should_panic]
474 fn minus_zero_int() {
475 tok_str("-01");
476 }
477 #[test]
478 #[should_panic]
479 fn minus_zero_int4() {
480 tok_str("-00001");
481 }
482 #[test]
483 #[should_panic]
484 fn letters_int() {
485 tok_str("0bbc");
486 }
487
488 #[test]
489 fn float() {
490 assert_eq!(tok_str("0.0"), ["0.0"]);
491 assert_eq!(tok_typ("0.0"), [FloatValue]);
492 assert_eq!(tok_str("-0.0"), ["-0.0"]);
493 assert_eq!(tok_typ("-0.0"), [FloatValue]);
494 assert_eq!(tok_str("-1.0"), ["-1.0"]);
495 assert_eq!(tok_typ("-1.0"), [FloatValue]);
496 assert_eq!(tok_str("-1.023"), ["-1.023"]);
497 assert_eq!(tok_typ("-1.023"), [FloatValue]);
498 assert_eq!(tok_str("-132.0"), ["-132.0"]);
499 assert_eq!(tok_typ("-132.0"), [FloatValue]);
500 assert_eq!(tok_str("132.0"), ["132.0"]);
501 assert_eq!(tok_typ("132.0"), [FloatValue]);
502 assert_eq!(tok_str("0e+0"), ["0e+0"]);
503 assert_eq!(tok_typ("0e+0"), [FloatValue]);
504 assert_eq!(tok_str("0.0e+0"), ["0.0e+0"]);
505 assert_eq!(tok_typ("0.0e+0"), [FloatValue]);
506 assert_eq!(tok_str("-0e+0"), ["-0e+0"]);
507 assert_eq!(tok_typ("-0e+0"), [FloatValue]);
508 assert_eq!(tok_str("-1e+0"), ["-1e+0"]);
509 assert_eq!(tok_typ("-1e+0"), [FloatValue]);
510 assert_eq!(tok_str("-132e+0"), ["-132e+0"]);
511 assert_eq!(tok_typ("-132e+0"), [FloatValue]);
512 assert_eq!(tok_str("132e+0"), ["132e+0"]);
513 assert_eq!(tok_typ("132e+0"), [FloatValue]);
514 assert_eq!(
515 tok_str("a(x: 10.0) { b }"),
516 ["a", "(", "x", ":", "10.0", ")", "{", "b", "}"]
517 );
518 assert_eq!(
519 tok_typ("a(x: 10.0) { b }"),
520 [
521 Name, Punctuator, Name, Punctuator, FloatValue, Punctuator, Punctuator, Name,
522 Punctuator
523 ]
524 );
525 assert_eq!(tok_str("1.23e4"), ["1.23e4"]);
526 assert_eq!(tok_typ("1.23e4"), [FloatValue]);
527 }
528
529 #[test]
531 #[should_panic]
532 fn no_int_float() {
533 tok_str(".0");
534 }
535 #[test]
536 #[should_panic]
537 fn no_int_float1() {
538 tok_str(".1");
539 }
540 #[test]
541 #[should_panic]
542 fn zero_float() {
543 tok_str("01.0");
544 }
545 #[test]
546 #[should_panic]
547 fn zero_float4() {
548 tok_str("00001.0");
549 }
550 #[test]
551 #[should_panic]
552 fn minus_float() {
553 tok_str("-.0");
554 }
555 #[test]
556 #[should_panic]
557 fn minus_zero_float() {
558 tok_str("-01.0");
559 }
560 #[test]
561 #[should_panic]
562 fn minus_zero_float4() {
563 tok_str("-00001.0");
564 }
565 #[test]
566 #[should_panic]
567 fn letters_float() {
568 tok_str("0bbc.0");
569 }
570 #[test]
571 #[should_panic]
572 fn letters_float2() {
573 tok_str("0.bbc");
574 }
575 #[test]
576 #[should_panic]
577 fn letters_float3() {
578 tok_str("0.bbce0");
579 }
580 #[test]
581 #[should_panic]
582 fn no_exp_sign_float() {
583 tok_str("0e0");
584 }
585 #[test]
586 #[should_panic]
587 fn unterminated_string() {
588 tok_str(r#""hello\""#);
589 }
590 #[test]
591 #[should_panic]
592 fn extra_unterminated_string() {
593 tok_str(r#""hello\\\""#);
594 }
595
596 #[test]
597 fn string() {
598 assert_eq!(tok_str(r#""""#), [r#""""#]);
599 assert_eq!(tok_typ(r#""""#), [StringValue]);
600 assert_eq!(tok_str(r#""hello""#), [r#""hello""#]);
601 assert_eq!(tok_str(r#""hello\\""#), [r#""hello\\""#]);
602 assert_eq!(tok_str(r#""hello\\\\""#), [r#""hello\\\\""#]);
603 assert_eq!(tok_str(r#""he\\llo""#), [r#""he\\llo""#]);
604 assert_eq!(tok_typ(r#""hello""#), [StringValue]);
605 assert_eq!(tok_str(r#""my\"quote""#), [r#""my\"quote""#]);
606 assert_eq!(tok_typ(r#""my\"quote""#), [StringValue]);
607 }
608
609 #[test]
610 fn block_string() {
611 assert_eq!(tok_str(r#""""""""#), [r#""""""""#]);
612 assert_eq!(tok_typ(r#""""""""#), [BlockString]);
613 assert_eq!(tok_str(r#""""hello""""#), [r#""""hello""""#]);
614 assert_eq!(tok_typ(r#""""hello""""#), [BlockString]);
615 assert_eq!(tok_str(r#""""my "quote" """"#), [r#""""my "quote" """"#]);
616 assert_eq!(tok_typ(r#""""my "quote" """"#), [BlockString]);
617 assert_eq!(tok_str(r#""""\"""quote" """"#), [r#""""\"""quote" """"#]);
618 assert_eq!(tok_typ(r#""""\"""quote" """"#), [BlockString]);
619 }
620}