1use std::fmt;
2
3use combine::easy::{Error, Errors};
4use combine::error::StreamError;
5use combine::stream::Resetable;
6use combine::{Positioned, StreamOnce};
7
8use crate::position::Pos;
9
10#[derive(Debug, PartialEq, Eq, Clone, Copy)]
11pub enum Kind {
12 Punctuator,
13 Name,
14 IntValue,
15 FloatValue,
16 StringValue,
17 BlockString,
18}
19
20#[derive(Debug, PartialEq, Eq, Clone, Copy)]
21pub struct Token<'a> {
22 pub kind: Kind,
23 pub value: &'a str,
24}
25
26#[derive(Debug, PartialEq)]
27pub struct TokenStream<'a> {
28 buf: &'a str,
29 position: Pos,
30 off: usize,
31 next_state: Option<(usize, Token<'a>, usize, Pos)>,
32 recursion_limit: usize,
33}
34
35impl TokenStream<'_> {
36 pub(crate) fn offset(&self) -> usize {
37 self.off
38 }
39}
40
41#[derive(Clone, Debug, PartialEq)]
42pub struct Checkpoint {
43 position: Pos,
44 off: usize,
45}
46
47impl<'a> StreamOnce for TokenStream<'a> {
48 type Item = Token<'a>;
49 type Range = Token<'a>;
50 type Position = Pos;
51 type Error = Errors<Token<'a>, Token<'a>, Pos>;
52
53 fn uncons(&mut self) -> Result<Self::Item, Error<Token<'a>, Token<'a>>> {
54 if let Some((at, tok, off, pos)) = self.next_state {
55 if at == self.off {
56 self.off = off;
57 self.position = pos;
58 return Ok(tok);
59 }
60 }
61 let old_pos = self.off;
62 let (kind, len) = self.take_token()?;
63 let value = &self.buf[self.off - len..self.off];
64 self.skip_whitespace();
65 let token = Token { kind, value };
66 self.next_state = Some((old_pos, token, self.off, self.position));
67 Ok(token)
68 }
69}
70
71impl<'a> Positioned for TokenStream<'a> {
72 fn position(&self) -> Self::Position {
73 self.position
74 }
75}
76
77impl<'a> Resetable for TokenStream<'a> {
78 type Checkpoint = Checkpoint;
79 fn checkpoint(&self) -> Self::Checkpoint {
80 Checkpoint {
81 position: self.position,
82 off: self.off,
83 }
84 }
85 fn reset(&mut self, checkpoint: Checkpoint) {
86 self.position = checkpoint.position;
87 self.off = checkpoint.off;
88 }
89}
90
91fn check_int(value: &str) -> bool {
94 value == "0"
95 || value == "-0"
96 || (!value.starts_with('0')
97 && value != "-"
98 && !value.starts_with("-0")
99 && value[1..].chars().all(|x| x >= '0' && x <= '9'))
100}
101
102fn check_dec(value: &str) -> bool {
103 !value.is_empty() && value.chars().all(|x| x >= '0' && x <= '9')
104}
105
106fn check_exp(value: &str) -> bool {
107 if value.is_empty() {
108 return false;
109 }
110 let first = value.chars().next().unwrap();
111 if first != '-' && first != '+' && (first <= '0' || first >= '9') {
112 return false;
113 }
114
115 value[1..].chars().all(|x| x >= '0' && x <= '9')
116}
117
118fn check_float(value: &str, exponent: Option<usize>, real: Option<usize>) -> bool {
119 match (exponent, real) {
120 (Some(e), Some(r)) if e < r => false,
121 (Some(e), Some(r)) => {
122 check_int(&value[..r]) && check_dec(&value[r + 1..e]) && check_exp(&value[e + 1..])
123 }
124 (Some(e), None) => check_int(&value[..e]) && check_exp(&value[e + 1..]),
125 (None, Some(r)) => check_int(&value[..r]) && check_dec(&value[r + 1..]),
126 (None, None) => unreachable!(),
127 }
128}
129
130impl<'a> TokenStream<'a> {
131 pub fn new(s: &str) -> TokenStream {
132 Self::with_recursion_limit(s, 50)
133 }
134
135 pub(crate) fn with_recursion_limit(s: &str, recursion_limit: usize) -> TokenStream {
139 let mut me = TokenStream {
140 buf: s,
141 position: Pos { line: 1, column: 1 },
142 off: 0,
143 next_state: None,
144 recursion_limit,
145 };
146 me.skip_whitespace();
147 me
148 }
149
150 #[inline]
153 fn advance_token<T>(&mut self, kind: Kind, size: usize) -> Result<(Kind, usize), T> {
154 self.position.column += size;
155 self.off += size;
156 Ok((kind, size))
157 }
158
159 fn take_token(&mut self) -> Result<(Kind, usize), Error<Token<'a>, Token<'a>>> {
160 use self::Kind::*;
161 let mut iter = self.buf[self.off..].char_indices();
162 let cur_char = match iter.next() {
163 Some((_, x)) => x,
164 None => return Err(Error::end_of_input()),
165 };
166
167 match cur_char {
168 '(' | '[' | '{' => {
169 self.recursion_limit = self
171 .recursion_limit
172 .checked_sub(1)
173 .ok_or_else(|| Error::message_static_message("Recursion limit exceeded"))?;
174
175 self.advance_token(Punctuator, 1)
176 }
177 ')' | ']' | '}' => {
178 self.recursion_limit = self.recursion_limit.saturating_add(1);
189 self.advance_token(Punctuator, 1)
190 }
191 '!' | '$' | ':' | '=' | '@' | '|' | '&' => self.advance_token(Punctuator, 1),
192 '.' => {
193 if iter.as_str().starts_with("..") {
194 self.advance_token(Punctuator, 3)
195 } else {
196 Err(Error::unexpected_message(format_args!(
197 "bare dot {:?} is not supported, \
198 only \"...\"",
199 cur_char
200 )))
201 }
202 }
203 '_' | 'a'..='z' | 'A'..='Z' => {
204 while let Some((idx, cur_char)) = iter.next() {
205 match cur_char {
206 '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' => continue,
207 _ => return self.advance_token(Name, idx),
208 }
209 }
210 let len = self.buf.len() - self.off;
211 self.position.column += len;
212 self.off += len;
213
214 Ok((Name, len))
215 }
216 '-' | '0'..='9' => {
217 let mut exponent = None;
218 let mut real = None;
219 let len = loop {
220 let (idx, cur_char) = match iter.next() {
221 Some(pair) => pair,
222 None => break self.buf.len() - self.off,
223 };
224 match cur_char {
225 ' ' | '\n' | '\r' | '\t' | ',' | '#' | '!' | '$' | ':' | '=' | '@'
227 | '|' | '&' | '(' | ')' | '[' | ']' | '{' | '}' => break idx,
228 '.' => real = Some(idx),
229 'e' | 'E' => exponent = Some(idx),
230 _ => {}
231 }
232 };
233
234 if exponent.is_some() || real.is_some() {
235 let value = &self.buf[self.off..][..len];
236 if !check_float(value, exponent, real) {
237 return Err(Error::unexpected_message(format_args!(
238 "unsupported float {:?}",
239 value
240 )));
241 }
242 self.position.column += len;
243 self.off += len;
244
245 Ok((FloatValue, len))
246 } else {
247 let value = &self.buf[self.off..][..len];
248 if !check_int(value) {
249 return Err(Error::unexpected_message(format_args!(
250 "unsupported integer {:?}",
251 value
252 )));
253 }
254 self.advance_token(IntValue, len)
255 }
256 }
257 '"' => {
258 if iter.as_str().starts_with("\"\"") {
259 let tail = &iter.as_str()[2..];
260 for (end_idx, _) in tail.match_indices("\"\"\"") {
261 if !tail[..end_idx].ends_with('\\') {
262 self.update_position(end_idx + 6);
263 return Ok((BlockString, end_idx + 6));
264 }
265 }
266
267 Err(Error::unexpected_message("unterminated block string value"))
268 } else {
269 let mut nchars = 1;
270 let mut escaped = false;
271 for (idx, cur_char) in iter {
272 nchars += 1;
273 match cur_char {
274 '"' if escaped => {}
275 '"' => {
276 self.position.column += nchars;
277 self.off += idx + 1;
278 return Ok((StringValue, idx + 1));
279 }
280 '\n' => {
281 return Err(Error::unexpected_message("unterminated string value"));
282 }
283
284 _ => {}
285 }
286
287 escaped = !escaped && cur_char == '\\';
289 }
290 Err(Error::unexpected_message("unterminated string value"))
291 }
292 }
293 _ => Err(Error::unexpected_message(format_args!(
294 "unexpected character {:?}",
295 cur_char
296 ))),
297 }
298 }
299
300 fn skip_whitespace(&mut self) {
301 let mut iter = self.buf[self.off..].char_indices();
302 let idx = loop {
303 let (idx, cur_char) = match iter.next() {
304 Some(pair) => pair,
305 None => break self.buf.len() - self.off,
306 };
307 match cur_char {
308 '\u{feff}' | '\r' => continue,
309 '\t' => self.position.column += 8,
310 '\n' => {
311 self.position.column = 1;
312 self.position.line += 1;
313 }
314 ' ' | ',' => {
316 self.position.column += 1;
317 continue;
318 }
319 '#' => {
321 while let Some((_, cur_char)) = iter.next() {
322 if cur_char == '\r' || cur_char == '\n' {
324 self.position.column = 1;
325 self.position.line += 1;
326 break;
327 }
328 }
329 continue;
330 }
331 _ => break idx,
332 }
333 };
334 self.off += idx;
335 }
336
337 fn update_position(&mut self, len: usize) {
338 let val = &self.buf[self.off..][..len];
339 self.off += len;
340 let lines = val.as_bytes().iter().filter(|&&x| x == b'\n').count();
341 self.position.line += lines;
342 if lines > 0 {
343 let line_offset = val.rfind('\n').unwrap() + 1;
344 let num = val[line_offset..].chars().count();
345 self.position.column = num + 1;
346 } else {
347 let num = val.chars().count();
348 self.position.column += num;
349 }
350 }
351}
352
353impl<'a> fmt::Display for Token<'a> {
354 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
355 write!(f, "{}[{:?}]", self.value, self.kind)
356 }
357}
358
359#[cfg(test)]
360mod test {
361 use super::Kind::*;
362 use super::{Kind, TokenStream};
363 use combine::easy::Error;
364
365 use combine::{Positioned, StreamOnce};
366
367 fn tok_str(s: &str) -> Vec<&str> {
368 let mut r = Vec::new();
369 let mut s = TokenStream::new(s);
370 loop {
371 match s.uncons() {
372 Ok(x) => r.push(x.value),
373 Err(ref e) if e == &Error::end_of_input() => break,
374 Err(e) => panic!("Parse error at {}: {}", s.position(), e),
375 }
376 }
377 return r;
378 }
379 fn tok_typ(s: &str) -> Vec<Kind> {
380 let mut r = Vec::new();
381 let mut s = TokenStream::new(s);
382 loop {
383 match s.uncons() {
384 Ok(x) => r.push(x.kind),
385 Err(ref e) if e == &Error::end_of_input() => break,
386 Err(e) => panic!("Parse error at {}: {}", s.position(), e),
387 }
388 }
389 return r;
390 }
391
392 #[test]
393 fn comments_and_commas() {
394 assert_eq!(tok_str("# hello { world }"), &[] as &[&str]);
395 assert_eq!(tok_str("# x\n,,,"), &[] as &[&str]);
396 assert_eq!(tok_str(", ,, ,,, # x"), &[] as &[&str]);
397 }
398
399 #[test]
400 fn simple() {
401 assert_eq!(tok_str("a { b }"), ["a", "{", "b", "}"]);
402 assert_eq!(tok_typ("a { b }"), [Name, Punctuator, Name, Punctuator]);
403 }
404
405 #[test]
406 fn query() {
407 assert_eq!(
408 tok_str(
409 "query Query {
410 object { field }
411 }"
412 ),
413 ["query", "Query", "{", "object", "{", "field", "}", "}"]
414 );
415 }
416
417 #[test]
418 fn fragment() {
419 assert_eq!(tok_str("a { ...b }"), ["a", "{", "...", "b", "}"]);
420 }
421
422 #[test]
423 fn int() {
424 assert_eq!(tok_str("0"), ["0"]);
425 assert_eq!(tok_str("0,"), ["0"]);
426 assert_eq!(tok_str("0# x"), ["0"]);
427 assert_eq!(tok_typ("0"), [IntValue]);
428 assert_eq!(tok_str("-0"), ["-0"]);
429 assert_eq!(tok_typ("-0"), [IntValue]);
430 assert_eq!(tok_str("-1"), ["-1"]);
431 assert_eq!(tok_typ("-1"), [IntValue]);
432 assert_eq!(tok_str("-132"), ["-132"]);
433 assert_eq!(tok_typ("-132"), [IntValue]);
434 assert_eq!(tok_str("132"), ["132"]);
435 assert_eq!(tok_typ("132"), [IntValue]);
436 assert_eq!(
437 tok_str("a(x: 10) { b }"),
438 ["a", "(", "x", ":", "10", ")", "{", "b", "}"]
439 );
440 assert_eq!(
441 tok_typ("a(x: 10) { b }"),
442 [
443 Name, Punctuator, Name, Punctuator, IntValue, Punctuator, Punctuator, Name,
444 Punctuator
445 ]
446 );
447 }
448
449 #[test]
451 #[should_panic]
452 fn zero_int() {
453 tok_str("01");
454 }
455 #[test]
456 #[should_panic]
457 fn zero_int4() {
458 tok_str("00001");
459 }
460 #[test]
461 #[should_panic]
462 fn minus_int() {
463 tok_str("-");
464 }
465 #[test]
466 #[should_panic]
467 fn minus_zero_int() {
468 tok_str("-01");
469 }
470 #[test]
471 #[should_panic]
472 fn minus_zero_int4() {
473 tok_str("-00001");
474 }
475 #[test]
476 #[should_panic]
477 fn letters_int() {
478 tok_str("0bbc");
479 }
480
481 #[test]
482 fn float() {
483 assert_eq!(tok_str("0.0"), ["0.0"]);
484 assert_eq!(tok_typ("0.0"), [FloatValue]);
485 assert_eq!(tok_str("-0.0"), ["-0.0"]);
486 assert_eq!(tok_typ("-0.0"), [FloatValue]);
487 assert_eq!(tok_str("-1.0"), ["-1.0"]);
488 assert_eq!(tok_typ("-1.0"), [FloatValue]);
489 assert_eq!(tok_str("-1.023"), ["-1.023"]);
490 assert_eq!(tok_typ("-1.023"), [FloatValue]);
491 assert_eq!(tok_str("-132.0"), ["-132.0"]);
492 assert_eq!(tok_typ("-132.0"), [FloatValue]);
493 assert_eq!(tok_str("132.0"), ["132.0"]);
494 assert_eq!(tok_typ("132.0"), [FloatValue]);
495 assert_eq!(tok_str("0e+0"), ["0e+0"]);
496 assert_eq!(tok_typ("0e+0"), [FloatValue]);
497 assert_eq!(tok_str("0.0e+0"), ["0.0e+0"]);
498 assert_eq!(tok_typ("0.0e+0"), [FloatValue]);
499 assert_eq!(tok_str("-0e+0"), ["-0e+0"]);
500 assert_eq!(tok_typ("-0e+0"), [FloatValue]);
501 assert_eq!(tok_str("-1e+0"), ["-1e+0"]);
502 assert_eq!(tok_typ("-1e+0"), [FloatValue]);
503 assert_eq!(tok_str("-132e+0"), ["-132e+0"]);
504 assert_eq!(tok_typ("-132e+0"), [FloatValue]);
505 assert_eq!(tok_str("132e+0"), ["132e+0"]);
506 assert_eq!(tok_typ("132e+0"), [FloatValue]);
507 assert_eq!(
508 tok_str("a(x: 10.0) { b }"),
509 ["a", "(", "x", ":", "10.0", ")", "{", "b", "}"]
510 );
511 assert_eq!(
512 tok_typ("a(x: 10.0) { b }"),
513 [
514 Name, Punctuator, Name, Punctuator, FloatValue, Punctuator, Punctuator, Name,
515 Punctuator
516 ]
517 );
518 assert_eq!(tok_str("1.23e4"), ["1.23e4"]);
519 assert_eq!(tok_typ("1.23e4"), [FloatValue]);
520 }
521
522 #[test]
524 #[should_panic]
525 fn no_int_float() {
526 tok_str(".0");
527 }
528 #[test]
529 #[should_panic]
530 fn no_int_float1() {
531 tok_str(".1");
532 }
533 #[test]
534 #[should_panic]
535 fn zero_float() {
536 tok_str("01.0");
537 }
538 #[test]
539 #[should_panic]
540 fn zero_float4() {
541 tok_str("00001.0");
542 }
543 #[test]
544 #[should_panic]
545 fn minus_float() {
546 tok_str("-.0");
547 }
548 #[test]
549 #[should_panic]
550 fn minus_zero_float() {
551 tok_str("-01.0");
552 }
553 #[test]
554 #[should_panic]
555 fn minus_zero_float4() {
556 tok_str("-00001.0");
557 }
558 #[test]
559 #[should_panic]
560 fn letters_float() {
561 tok_str("0bbc.0");
562 }
563 #[test]
564 #[should_panic]
565 fn letters_float2() {
566 tok_str("0.bbc");
567 }
568 #[test]
569 #[should_panic]
570 fn letters_float3() {
571 tok_str("0.bbce0");
572 }
573 #[test]
574 #[should_panic]
575 fn no_exp_sign_float() {
576 tok_str("0e0");
577 }
578 #[test]
579 #[should_panic]
580 fn unterminated_string() {
581 tok_str(r#""hello\""#);
582 }
583 #[test]
584 #[should_panic]
585 fn extra_unterminated_string() {
586 tok_str(r#""hello\\\""#);
587 }
588
589 #[test]
590 fn string() {
591 assert_eq!(tok_str(r#""""#), [r#""""#]);
592 assert_eq!(tok_typ(r#""""#), [StringValue]);
593 assert_eq!(tok_str(r#""hello""#), [r#""hello""#]);
594 assert_eq!(tok_str(r#""hello\\""#), [r#""hello\\""#]);
595 assert_eq!(tok_str(r#""hello\\\\""#), [r#""hello\\\\""#]);
596 assert_eq!(tok_str(r#""he\\llo""#), [r#""he\\llo""#]);
597 assert_eq!(tok_typ(r#""hello""#), [StringValue]);
598 assert_eq!(tok_str(r#""my\"quote""#), [r#""my\"quote""#]);
599 assert_eq!(tok_typ(r#""my\"quote""#), [StringValue]);
600 }
601
602 #[test]
603 fn block_string() {
604 assert_eq!(tok_str(r#""""""""#), [r#""""""""#]);
605 assert_eq!(tok_typ(r#""""""""#), [BlockString]);
606 assert_eq!(tok_str(r#""""hello""""#), [r#""""hello""""#]);
607 assert_eq!(tok_typ(r#""""hello""""#), [BlockString]);
608 assert_eq!(tok_str(r#""""my "quote" """"#), [r#""""my "quote" """"#]);
609 assert_eq!(tok_typ(r#""""my "quote" """"#), [BlockString]);
610 assert_eq!(tok_str(r#""""\"""quote" """"#), [r#""""\"""quote" """"#]);
611 assert_eq!(tok_typ(r#""""\"""quote" """"#), [BlockString]);
612 }
613}